* Add: fuzzer files and modifications in config & compil * add configure.ac change * add minimize-corpus.sh * add fuzzing directory and readme * add to check if CC support libfuzzer * Make workflow dump the crash POC * Add debugging information * Run fuzzing only once a week for now Co-authored-by: kmamadoudram <[email protected]> Co-authored-by: yocvito <[email protected]> Co-authored-by: Samuel Thibault <[email protected]>

3 years ago · 1f76c4b8bd
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -22,21 +22,21 @@ jobs:
        include:
          - sanitizer: "no"
            deps: ""
            configflags: ""
            configflags: "-g -Og -fno-omit-frame-pointer"
            config: ""
            build_env: ""
            check_env: ""

          - sanitizer: "address"
            deps: ""
            configflags: "-fsanitize=address -g -Og"
            configflags: "-fsanitize=address -g -Og -fno-omit-frame-pointer"
            config: ""
            build_env: "ASAN_OPTIONS=detect_leaks=0"
            check_env: "ASAN_OPTIONS=detect_leaks=0"

          - sanitizer: "leak"
            deps: ""
            configflags: '-fsanitize=leak -g -Og'
            configflags: '-fsanitize=leak -g -Og -fno-omit-frame-pointer'
            config: ""
            build_env: "LSAN_OPTIONS=fast_unwind_on_malloc=0"
            check_env: "LSAN_OPTIONS=fast_unwind_on_malloc=0"
@@ -44,28 +44,28 @@ jobs:
          - sanitizer: "memory"
            deps: "clang"
            configenv: "CC=clang CXX=clang++"
            configflags: "-fsanitize=memory -fsanitize-memory-track-origins=2 -g -Og"
            configflags: "-fsanitize=memory -fsanitize-memory-track-origins=2 -g -Og -fno-omit-frame-pointer"
            config: "--without-pcaudiolib"
            build_env: "MSAN_OPTIONS=exitcode=42"
            check_env: "MSAN_OPTIONS=exitcode=42"

          - sanitizer: "thread"
            deps: ""
            configflags: "-fsanitize=thread -g -Og"
            configflags: "-fsanitize=thread -g -Og -fno-omit-frame-pointer"
            config: ""
            build_env: ""
            check_env: ""

          - sanitizer: "undefined"
            deps: ""
            configflags: "-fsanitize=undefined -g -Og"
            configflags: "-fsanitize=undefined -g -Og -fno-omit-frame-pointer"
            config: ""
            build_env: "UBSAN_OPTIONS=halt_on_error=1"
            check_env: "UBSAN_OPTIONS=halt_on_error=1"

          - sanitizer: "valgrind"
            deps: "libtool-bin valgrind"
            configflags: '-g -Og'
            configflags: '-g -Og -fno-omit-frame-pointer'
            config: ""
            build_env: 'VALGRIND="libtool --mode=execute valgrind --track-origins=yes --leak-check=full --error-exitcode=1" '
            check_env: 'VALGRIND="libtool --mode=execute valgrind --track-origins=yes --leak-check=full --error-exitcode=1" '
--- a/.github/workflows/fuzzing.yml
+++ b/.github/workflows/fuzzing.yml
@@ -0,0 +1,64 @@
 name: fuzzing

 on:
  workflow_dispatch:
  schedule:
    - cron: "0 4 * * 1"

 jobs:
  fuzzing:

    runs-on: ubuntu-latest
    name: Fuzz synth_espeak on ${{ matrix.arch }} for ${{ matrix.lang }}
    strategy:
      fail-fast: false
      matrix:
        arch: [x86-32, x86-64]
        lang: [af, am, an, ar, as, az, ba, be, bg, bn, bpy, bs, ca, chr, cmn, cs, cv, cy, da, de, el, en, eo, es, et, eu, fa, fi, fr, ga, gd, gn, grc, gu, hak, haw, he, hi, hr, ht, hu, hy, ia, id, io, is, it, ja, jbo, ka, kk, kl, kn, ko, kok, ku, ky, la, lb, lfn, lt, lv, mi, mk, ml, mr, ms, mt, my, nci, ne, nl, no, nog, om, or, pa, pap, piqd, pl, pt, py, qdb, qu, quc, qya, ro, ru, sd, shn, si, sjn, sk, sl, smj, sq, sr, sv, sw, ta, te, th, tk, tn, tr, tt, ug, uk, ur, uz, vi, yue]
        include:
          - arch: x86-32
            archdeps: "gcc-multilib g++-multilib libpcaudio-dev:i386 libsonic-dev:i386 libc6-dbg:i386"
            archconfigflags: "-m32"

          - arch: x86-64
            archdeps: ""
            archconfigflags: ''
    steps:
    - uses: actions/checkout@v2
    - name: enable 32bit architecture
      run: sudo dpkg --add-architecture i386
      if: matrix.arch == 'x86-32'
    - name: dependencies
      run: sudo apt-get update && sudo apt-get install libpcaudio-dev libsonic-dev ronn kramdown clang llvm ${{ matrix.archdeps }}
    - name: autoconf
      run: ./autogen.sh ; chmod -x INSTALL m4/*.m4
    - name: configure
      run: CC=clang CXX=clang++
           CFLAGS="${{ matrix.archconfigflags }} -fsanitize=address,undefined -fstack-protector-strong -g -Og -fno-omit-frame-pointer"
           CXXFLAGS="${{ matrix.archconfigflags }} -fsanitize=address,undefined -fstack-protector-strong -g -Og -fno-omit-frame-pointer"
           LDFLAGS="-fsanitize=address,undefined -lubsan"
           ./configure --with-libfuzzer
    - name: Store the fuzzer config
      if: ${{ failure() }}
      uses: actions/upload-artifact@v2
      with:
        name: config-${{ matrix.arch }}-${{ matrix.lang }}.log
        path: config.log
    - name: make
      run: make -j
    - name: Fuzz function synth_espeak()
      run:   mkdir tests/fuzzing/CORPUS_DIR ; FUZZ_VOICE=${{ matrix.lang }} tests/fuzzing/synth_fuzzer.test -seed=1  -runs=10000 -max_len=4096 tests/fuzzing/CORPUS_DIR
    - name: Store the crash POC
      if: ${{ failure() }}
      uses: actions/upload-artifact@v2
      with:
        name: crash-${{ matrix.arch }}-${{ matrix.lang }}.1
        path: crash-*
    - name: Fuzz function synth_espeak() with language-specific input
      run:   cp dictsource/${{ matrix.lang }}_* tests/fuzzing/CORPUS_DIR/ ; FUZZ_VOICE=${{ matrix.lang }} tests/fuzzing/synth_fuzzer.test -seed=1  -runs=10000 -max_len=4096 tests/fuzzing/CORPUS_DIR
    - name: Store the crash POC
      if: ${{ failure() }}
      uses: actions/upload-artifact@v2
      with:
        name: crash-${{ matrix.arch }}-${{ matrix.lang }}.2
        path: crash-*
--- a/.gitignore
+++ b/.gitignore
@@ -123,10 +123,19 @@ espeak-ng.pc
 espeak-ng-*.tar.gz
 espeak-ng-*.*/

 # /tests/fuzzing/
 /tests/fuzzing/crash-*
 /tests/fuzzing/oom-*
 /tests/fuzzing/leak-*
 /tests/fuzzing/fuzz-*.log
 /tests/fuzzing/*.profdata
 /tests/fuzzing/*.profraw
 /tests/fuzzing/.deps/*.Po
 /tests/fuzzing/.dirstamp
 !tests/fuzzing/CORPUS*/*.txt
 # Windows builds

 src/pcaudiolib/

 !src/windows/config.h

 *.obj
--- a/Makefile.am
+++ b/Makefile.am
@@ -18,6 +18,12 @@ MKDIR=mkdir -p
 AM_CFLAGS = \
 	-Isrc/include -Isrc/include/compat -I$(srcdir)/src/speechPlayer/include -I$(srcdir)/src/ucd-tools/src/include \
 	-D_BSD_SOURCE -D_DEFAULT_SOURCE -D_POSIX_C_SOURCE=200112L
 AM_CXXFLAGS =

 if USE_COVERAGE
 AM_CFLAGS += -fprofile-instr-generate -fcoverage-mapping
 AM_CXXFLAGS += -fprofile-instr-generate -fcoverage-mapping
 endif

 EXTRA_DIST=
 CLEANFILES = dictsource/ru_listx dictsource/cmn_listx dictsource/yue_listx
@@ -321,6 +327,8 @@ tests_libfuzzrunner_la_CFLAGS  = -Isrc/libespeak-ng ${AM_CFLAGS}
 tests_libfuzzrunner_la_SOURCES = tests/fuzzrunner.c
 endif



 check_PROGRAMS += tests/ssml-fuzzer.test
 tests_ssml_fuzzer_test_CFLAGS  = ${AM_CFLAGS}
 tests_ssml_fuzzer_test_SOURCES = tests/ssml-fuzzer.c
@@ -333,9 +341,16 @@ tests_readclause_test_SOURCES += tests/dummy.cpp
 tests_ssml_fuzzer_test_SOURCES += tests/dummy.cpp
 endif


 if HAVE_LIBFUZZER
 tests_ssml_fuzzer_test_CFLAGS += -fsanitize=fuzzer
 tests_ssml_fuzzer_test_LDFLAGS = -fsanitize=fuzzer

 noinst_PROGRAMS =  tests/fuzzing/synth_fuzzer.test
 tests_fuzzing_synth_fuzzer_test_SOURCES = tests/fuzzing/synth_fuzzer.c
 tests_fuzzing_synth_fuzzer_test_LDADD =  src/libespeak-ng.la
 tests_fuzzing_synth_fuzzer_test_CFLAGS=  ${AM_CFLAGS} -fsanitize=fuzzer -DPATH_ESPEAK_DATA=\"$(abs_top_srcdir)/espeak-ng-data\" -Isrc/libespeak-ng
 tests_fuzzing_synth_fuzzer_test_LDFLAGS= ${AM_LDFLAGS} -fsanitize=fuzzer -static -lm -Wl,-z,relro ${PCAUDIOLIB_LIBS}
 else
 tests_ssml_fuzzer_test_LDADD += tests/libfuzzrunner.la
 endif
--- a/configure.ac
+++ b/configure.ac
@@ -64,6 +64,10 @@ AC_ARG_WITH([libfuzzer],
    [AS_HELP_STRING([--with-libfuzzer], [enable libFuzzer in the fuzzer tests @<:@default=no@:>@])],
    [])

 AC_ARG_WITH([coverage],
    [AS_HELP_STRING([--with-coverage], [enable clang coverage in the fuzzer tests (also add coverage to lib sources) @<:@default=no@:>@])],
    [])

 dnl ================================================================
 dnl Program checks.
 dnl ================================================================
@@ -131,6 +135,19 @@ else
 	AC_MSG_ERROR([C99 is not supported by $CC.])
 fi


 AC_LANG_PUSH(C)
 TEMP_CFLAGS="$CFLAGS"
 CFLAGS="$CFLAGS -fsanitize=fuzzer"
 AC_MSG_CHECKING([if $CC supports fuzzer with the -fsanitize=fuzzer flag])
 AC_COMPILE_IFELSE(
 	[AC_LANG_PROGRAM( [[]], [[]])],
 	[have_fuzzer_fuzzer=yes],
 	[have_fuzzer_fuzzer=no])
 AC_MSG_RESULT($have_fuzzer_fuzzer)
 CFLAGS="$TEMP_CFLAGS"
 AC_LANG_POP(C)

 dnl ================================================================
 dnl FreeBSD check.
 dnl ================================================================
@@ -330,7 +347,19 @@ else
 	have_libfuzzer=no
 fi

 AM_CONDITIONAL(HAVE_LIBFUZZER, [test x"$have_libfuzzer" = xyes])
 AM_CONDITIONAL(HAVE_LIBFUZZER, [test x"$have_libfuzzer" = xyes  -a x"$have_fuzzer_fuzzer" = xyes])

 dnl ================================================================
 dnl clang-coverage checks.
 dnl ================================================================

 if test "$with_coverage" = "yes" ; then
 	use_coverage=yes
 else
 	use_coverage=no
 fi

 AM_CONDITIONAL(USE_COVERAGE, [test x"$use_coverage" = xyes])

 dnl ================================================================
 dnl Generate output.
--- a/tests/fuzzing/README.md
+++ b/tests/fuzzing/README.md
@@ -0,0 +1,61 @@
 All fuzzers here are run continously through OSS-fuzz.

 Link to OSS-fuzz integration: Pending

 # Translation fuzzers

 Currently, there is a fuzzer related to synthetizer, **synth_fuzzer** that will target **espak_Synth**. The following sections will explain how to configure the fuzzers, how to use them and how to get a coverage report of the fuzzing result. 

 ## Configure the project for fuzzing

 We have added some switchs to configure.ac for fuzzing and coverage. The `--with-fuzzer` switch will check if your are actually using clang and clang++ as compilers (by looking at CC and CXX) and allows generation of compilation instructions for fuzzer targets. The `--with-coverage` will add `-fprofile-instr-generate -fcoverage-mapping` to AM_CPPFLAGS in espeak/Makefile.am.

 To configure and build the project with coverage and fuzzer.
 ```./autogen.sh
 CC=clang CXX=clang++ ./configure --with-coverage --with-fuzzer
 make -j8
 ```

 ## Run the fuzzers

 You are now able to run the fuzzer and will have to give 2 parameters to it. First, you need to choose a language  to fuzz and set the `FUZZ_VOICE` environment variable to them. Then, you need to provide a corpus with files containing sample inputs that will be used by libfuzzer to craft the data passed to the fuzzing function (the idea is to keep corpus as minimal as possible). If you don't provide any corpus directory, libfuzzer just generates random inputs.

 Here is how you can start fuzzing  `espeak_Synth` function.
 ```

 # first we move to tests/fuzzing directory
 cd tests/fuzzing
 #to have interesting file in the corpus , there is a simple python script that allows you to do that
 ./create_dict_corpus_file.py -c CORPUS/

 # we consider here you have added corpus files into tests/fuzzing/CORPUS directory
 FUZZ_VOICE=en ./synth_fuzzer CORPUS/

 # to run the fuzzer using parallelization
 # you can even set more jobs than workers (the ones that just stopped will be instantly replaced by a new fuzzer process)
 FUZZ_VOICE=en ./synth_fuzzer CORPUS/ -workers=8 -jobs=8
 ```
 After running the fuzzer multiple times with the same corpus directory, it might be possible that many corpus files added by the fuzzer explores the same paths. Hopefully, libfuzzer allows you to minimize a corpus. There is a simple bash script in tests/fuzzing that allows you to do that.
 ```
 ./minimize-corpus.sh CORPUS/


 # if you have added a POC file in the corpus directory and you want to keep it intact, change his extension to .txt and use --preserve-txt switch that keep .txt files intact in the directory
 ./minimize-corpus.sh --preserve-txt CORPUS/
 ```
 ## Look at fuzzer coverage

 If you want to see what are the source code parts that are explored by the fuzzer, you can use clang coverage. So, you have to configure with coverage switch, run the fuzzer and show coverage data from the run with llvm tools. 
 To be able to use the coverage data, you need first to compile the raw profile data file of the run. By default, this file is created after execution under the name of default.profraw but you can specify it with `LLVM_PROFILE_FILE`.
 Here is how to do that.
 ```
 LLVM_PROFILE_FILE=synth_fuzzer.profraw FUZZ_VOICE=en ./synth_fuzzer CORPUS/ -workers=8 -jobs=8

 # wait for a bit and press CTRL+C

 # compile raw profile
 llvm-profdata merge -sparse synth_fuzzer.profraw -o synth_fuzzer.profdata

 # show coverage (redlines are the one wich are reached)
 llvm-cov show ./synth_fuzzer -instr-profile=synth_fuzzer.profdata
 ```
--- a/tests/fuzzing/create_dict_corpus_file.py
+++ b/tests/fuzzing/create_dict_corpus_file.py
@@ -0,0 +1,68 @@
 #!/bin/python3
 # /*
 #  * Copyright (C) 2022 Anna Stan ,  Mamaodou Dramé Kalilou , Nicolas Morel
 #  *
 #  * This program is free software; you can redistribute it and/or modify
 #  * it under the terms of the GNU General Public License as published by
 #  * the Free Software Foundation; either version 3 of the License, or
 #  * (at your option) any later version.
 #  *
 #  * This program is distributed in the hope that it will be useful,
 #  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 #  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #  * GNU General Public License for more details.
 #  *
 #  * You should have received a copy of the GNU General Public License
 #  * along with this program; if not, see: <http://www.gnu.org/licenses/>.
 #  */
 import sys
 import mmap
 import argparse
 import shutil
 import os
 from os import O_RDONLY, O_RDWR, O_WRONLY, O_TRUNC, O_CREAT, SEEK_END, SEEK_CUR, SEEK_SET

 def main(argc, argv):
  if argc < 2:
    print('Summary: add file to the corpus ', file=sys.stderr)
    print(f'Usage: {argv[0]} -c <corpus_dir>', file=sys.stderr)
    exit(1)

  ap = argparse.ArgumentParser()

  # Add the arguments to the parser
  ap.add_argument("-c", "--corpus_dir", required=True,
  help="corpus directory where to add the file")
  args = vars(ap.parse_args())


  lang_list=os.getenv("FUZZ_VOICE")
  if(lang_list):
    list=lang_list+"_list"
  else:
    list="en_list"
  output_name = list+"_dict_corpus.txt"
  output_path=args['corpus_dir']+output_name
  output= open(output_path, "w")

  path="../../dictsource/"+list
  

  file = open( path, "r")
  lines=file.readlines()
  index=1
  for line in lines:
    if line[0]=='/' and line[1]=='/':
      continue
    res = line.split()
    if len(res):
      output.write("kw")
      output.write(str(index))
      index=index+1
      output.write("=")
      output.write(res[0])
      output.write('\n')
  file.close()
  output.close()
 if __name__ == "__main__":
  main(len(sys.argv), sys.argv)
--- a/tests/fuzzing/minimize-corpus.sh
+++ b/tests/fuzzing/minimize-corpus.sh
@@ -0,0 +1,36 @@
 #!/bin/bash

 if [[ $# -lt 1 ]]
 then
  echo "Usage: $0 <corpus-dir>"
  echo "Usage: $0 --preserve-txt <corpus-dir> (minimize corpus but keep .txt files intact)"
  exit 1
 fi

 preserve_txt=0
 if [[ "$1" == "--preserve-txt" ]]
 then
  preserve_txt=1
  CORPUS_DIR=$2
 else
  CORPUS_DIR=$1
 fi


 export FUZZ_VOICE=en
 FUZZER=./synth_fuzzer

 TMP_DIR=$(mktemp -d)
 echo "Merging..."
 `$FUZZER -merge=1 $TMP_DIR $CORPUS_DIR`
 echo "Removing old files..."
 if [[ $preserve_txt -eq 1 ]]
 then
  echo " => Preserve .txt files"
  rm -rvf $(find $CORPUS_DIR | grep -vE "*.txt|$CORPUS_DIR") 2>/dev/null
 else
  rm -rf $CORPUS_DIR/* 2>/dev/null
 fi
 cp $TMP_DIR/* $CORPUS_DIR 2>/dev/null
 rm -rf $TMP_DIR
 echo "Merging done !"
--- a/tests/fuzzing/synth_fuzzer.c
+++ b/tests/fuzzing/synth_fuzzer.c
@@ -0,0 +1,80 @@
 /*
 * Copyright (C) 2018 Sascha Brawer
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write see:
 *             <http://www.gnu.org/licenses/>.
 */

 #include "config.h"

 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <libgen.h>
 #include <time.h>

 #include <espeak-ng/espeak_ng.h>
 #define BOLDRED(x) "\x1b[31m\x1b[1m" x "\x1b[0m"

 static int initialized = 0;

 static int
 espeak_callback(short *data, int samples, espeak_EVENT *events)
 {
 	(void)data; 
 	(void)samples; 
 	(void)events; 

 	return 0;
 }

 /* See http://llvm.org/docs/LibFuzzer.html */
 extern int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);

 extern int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)

 {
 	int buflength = size+1;
 	if (!initialized)
 	{
 		int options = espeakINITIALIZE_DONT_EXIT;
 		espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, buflength, PATH_ESPEAK_DATA, options);
 		espeak_SetSynthCallback(espeak_callback);
 		const char *lang = getenv("FUZZ_VOICE");
 		if (lang == NULL)
 		{
 			fprintf(stderr, "\n" BOLDRED("[Please set up FUZZ_VOICE env var before starting fuzzer]") "\n\n");
 			exit(1);
 		}
 		if (espeak_SetVoiceByName(lang) != EE_OK)
 		{
 			fprintf(stderr, "\n" BOLDRED("[Please supply a valid voice in FUZZ_VOICE]") "\n\n");
 			exit(1);
 		}
 		initialized = 1;
 		fprintf(stderr, "VOICE FUZZED = %s\n", lang);
 	}
 	char *mutable_data = strndup((char *)data, size);
 	if (!mutable_data)
 	{
 		perror("malloc");
 		exit(1);
 	}
 	unsigned int position = 0, position_type = POS_CHARACTER, end_position = 0 , synth_flags = espeakCHARS_AUTO;
 	espeak_Synth(mutable_data, buflength, position, position_type, end_position,
 				 synth_flags, NULL, NULL);
 	free(mutable_data);

 	return 0;
 }
--- a/tests/non-executable-files-with-executable-bit.test
+++ b/tests/non-executable-files-with-executable-bit.test
@@ -8,6 +8,8 @@ find * -executable -type f | \
 	grep -vE ".*\.sh|tools/emoji" | # Ignore helper scripts \
 	grep -vE "src/ucd-tools/tools/(.*\.py|mkencodingtable)" | # Ignore ucd-tools helper scripts \
 	grep -vE "tests/.libs|src/ucd-tools/tests/print(ucd|c)data(_cpp)?" | # Ignore ucd-tools test programs \
 	grep -vE "tests/fuzzing/.*\.py" | #Ignore fuzzing python helper script
 	grep -vE "tests/fuzzing/.*\.sh" | #Ignore fuzzing python helper script
 	tee tests/non-executable-files-with-executable-bit.check > /dev/null

 if [ -s tests/non-executable-files-with-executable-bit.check ] ; then