From 1fd005f552595ceb2a10f2deacc6b64a50019afb Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Wed, 6 Jun 2018 01:23:29 +0000 Subject: [PATCH] [libFuzzer] initial implementation of -data_flow_trace. It parses the data flow trace and prints the summary, but doesn't use the information in any other way yet llvm-svn: 334058 --- compiler-rt/lib/fuzzer/CMakeLists.txt | 1 + .../lib/fuzzer/FuzzerDataFlowTrace.cpp | 90 +++++++++++++++++++ compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h | 40 +++++++++ compiler-rt/lib/fuzzer/FuzzerDriver.cpp | 2 + compiler-rt/lib/fuzzer/FuzzerFlags.def | 2 + compiler-rt/lib/fuzzer/FuzzerIO.cpp | 8 ++ compiler-rt/lib/fuzzer/FuzzerIO.h | 2 + compiler-rt/lib/fuzzer/FuzzerInternal.h | 2 + compiler-rt/lib/fuzzer/FuzzerLoop.cpp | 1 + compiler-rt/lib/fuzzer/FuzzerOptions.h | 1 + compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp | 8 +- .../lib/fuzzer/scripts/collect_data_flow.py | 25 +++++- .../lib/fuzzer/tests/FuzzerUnittest.cpp | 8 ++ .../test/fuzzer/ThreeFunctionsTest.cpp | 4 +- compiler-rt/test/fuzzer/dataflow.test | 11 +++ 15 files changed, 202 insertions(+), 3 deletions(-) create mode 100644 compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp create mode 100644 compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h diff --git a/compiler-rt/lib/fuzzer/CMakeLists.txt b/compiler-rt/lib/fuzzer/CMakeLists.txt index 7e696de12f8f..7ec0dd551d04 100644 --- a/compiler-rt/lib/fuzzer/CMakeLists.txt +++ b/compiler-rt/lib/fuzzer/CMakeLists.txt @@ -1,5 +1,6 @@ set(LIBFUZZER_SOURCES FuzzerCrossOver.cpp + FuzzerDataFlowTrace.cpp FuzzerDriver.cpp FuzzerExtFunctionsDlsym.cpp FuzzerExtFunctionsDlsymWin.cpp diff --git a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp new file mode 100644 index 000000000000..69efd6f38b52 --- /dev/null +++ b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp @@ -0,0 +1,90 @@ +//===- FuzzerDataFlowTrace.cpp - DataFlowTrace ---*- C++ -* ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// fuzzer::DataFlowTrace +//===----------------------------------------------------------------------===// + +#include "FuzzerDataFlowTrace.h" +#include "FuzzerIO.h" + +#include +#include +#include +#include + +namespace fuzzer { + +void DataFlowTrace::Init(const std::string &DirPath, + const std::string &FocusFunction) { + if (DirPath.empty()) return; + const char *kFunctionsTxt = "functions.txt"; + Printf("INFO: DataFlowTrace: reading from '%s'\n", DirPath.c_str()); + Vector Files; + GetSizedFilesFromDir(DirPath, &Files); + std::string L; + + // Read functions.txt + std::ifstream IF(DirPlusFile(DirPath, kFunctionsTxt)); + size_t FocusFuncIdx = SIZE_MAX; + size_t NumFunctions = 0; + while (std::getline(IF, L, '\n')) { + NumFunctions++; + if (FocusFunction == L) + FocusFuncIdx = NumFunctions - 1; + } + if (!NumFunctions || FocusFuncIdx == SIZE_MAX || Files.size() <= 1) + return; + // Read traces. + size_t NumTraceFiles = 0; + size_t NumTracesWithFocusFunction = 0; + for (auto &SF : Files) { + auto Name = Basename(SF.File); + if (Name == kFunctionsTxt) continue; + auto ParseError = [&](const char *Err) { + Printf("DataFlowTrace: parse error: %s\n File: %s\n Line: %s\n", Err, + Name.c_str(), L.c_str()); + }; + NumTraceFiles++; + // Printf("=== %s\n", Name.c_str()); + std::ifstream IF(SF.File); + while (std::getline(IF, L, '\n')) { + size_t SpacePos = L.find(' '); + if (SpacePos == std::string::npos) + return ParseError("no space in the trace line"); + if (L.empty() || L[0] != 'F') + return ParseError("the trace line doesn't start with 'F'"); + size_t N = std::atol(L.c_str() + 1); + if (N >= NumFunctions) + return ParseError("N is greater than the number of functions"); + if (N == FocusFuncIdx) { + NumTracesWithFocusFunction++; + const char *Beg = L.c_str() + SpacePos + 1; + const char *End = L.c_str() + L.size(); + assert(Beg < End); + size_t Len = End - Beg; + Vector V(Len); + for (size_t I = 0; I < Len; I++) { + if (Beg[I] != '0' && Beg[I] != '1') + ParseError("the trace should contain only 0 or 1"); + V[I] = Beg[I] == '1'; + } + // Print just a few small traces. + if (NumTracesWithFocusFunction <= 3 && Len <= 16) + Printf("%s => |%s|\n", Name.c_str(), L.c_str() + SpacePos + 1); + break; // No need to parse the following lines. + } + } + } + assert(NumTraceFiles == Files.size() - 1); + Printf("INFO: DataFlowTrace: %zd trace files, %zd functions, " + "%zd traces with focus function\n", + NumTraceFiles, NumFunctions, NumTracesWithFocusFunction); +} + +} // namespace fuzzer + diff --git a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h new file mode 100644 index 000000000000..2b7b71fdbfa7 --- /dev/null +++ b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h @@ -0,0 +1,40 @@ +//===- FuzzerDataFlowTrace.h - Internal header for the Fuzzer ---*- C++ -* ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// fuzzer::DataFlowTrace; reads and handles a data-flow trace. +// +// A data flow trace is generated by e.g. dataflow/DataFlow.cpp +// and is stored on disk in a separate directory. +// +// The trace dir contains a file 'functions.txt' which lists function names, +// oner per line, e.g. +// ==> functions.txt <== +// Func2 +// LLVMFuzzerTestOneInput +// Func1 +// +// All other files in the dir are the traces, see dataflow/DataFlow.cpp. +// The name of the file is sha1 of the input used to generate the trace. +// +// Current status: +// the data is parsed and the summary is printed, but the data is not yet +// used in any other way. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_DATA_FLOW_TRACE +#define LLVM_FUZZER_DATA_FLOW_TRACE + +#include "FuzzerDefs.h" + +namespace fuzzer { +struct DataFlowTrace { + void Init(const std::string &DirPath, const std::string &FocusFunction); +}; +} // namespace fuzzer + +#endif // LLVM_FUZZER_DATA_FLOW_TRACE diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp index dfb3d492ced6..d7b95734991c 100644 --- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -623,6 +623,8 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.ExitOnItem = Flags.exit_on_item; if (Flags.focus_function) Options.FocusFunction = Flags.focus_function; + if (Flags.data_flow_trace) + Options.DataFlowTrace = Flags.data_flow_trace; unsigned Seed = Flags.seed; // Initialize Seed. diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def index 139e6187f3ad..5be6d2641ba2 100644 --- a/compiler-rt/lib/fuzzer/FuzzerFlags.def +++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -153,3 +153,5 @@ FUZZER_DEPRECATED_FLAG(use_equivalence_server) FUZZER_FLAG_INT(analyze_dict, 0, "Experimental") FUZZER_DEPRECATED_FLAG(use_clang_coverage) FUZZER_FLAG_INT(use_feature_frequency, 0, "Experimental/internal") + +FUZZER_FLAG_STRING(data_flow_trace, "Experimental: use the data flow trace") diff --git a/compiler-rt/lib/fuzzer/FuzzerIO.cpp b/compiler-rt/lib/fuzzer/FuzzerIO.cpp index dac5ec658f1c..f3ead0ec5357 100644 --- a/compiler-rt/lib/fuzzer/FuzzerIO.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerIO.cpp @@ -100,6 +100,14 @@ std::string DirPlusFile(const std::string &DirPath, return DirPath + GetSeparator() + FileName; } +std::string Basename(const std::string &Path, char Separator) { + size_t Pos = Path.rfind(Separator); + if (Pos == std::string::npos) + return Path; + assert(Pos < Path.size()); + return Path.substr(Pos + 1); +} + void DupAndCloseStderr() { int OutputFd = DuplicateFile(2); if (OutputFd > 0) { diff --git a/compiler-rt/lib/fuzzer/FuzzerIO.h b/compiler-rt/lib/fuzzer/FuzzerIO.h index ea9f0d5a6703..6d7757435b7b 100644 --- a/compiler-rt/lib/fuzzer/FuzzerIO.h +++ b/compiler-rt/lib/fuzzer/FuzzerIO.h @@ -67,6 +67,8 @@ struct SizedFile { void GetSizedFilesFromDir(const std::string &Dir, Vector *V); char GetSeparator(); +// Similar to the basename utility: returns the file name w/o the dir prefix. +std::string Basename(const std::string &Path, char Separator = GetSeparator()); FILE* OpenFile(int Fd, const char *Mode); diff --git a/compiler-rt/lib/fuzzer/FuzzerInternal.h b/compiler-rt/lib/fuzzer/FuzzerInternal.h index 2b2638f1f8f2..ec098a78f273 100644 --- a/compiler-rt/lib/fuzzer/FuzzerInternal.h +++ b/compiler-rt/lib/fuzzer/FuzzerInternal.h @@ -12,6 +12,7 @@ #ifndef LLVM_FUZZER_INTERNAL_H #define LLVM_FUZZER_INTERNAL_H +#include "FuzzerDataFlowTrace.h" #include "FuzzerDefs.h" #include "FuzzerExtFunctions.h" #include "FuzzerInterface.h" @@ -134,6 +135,7 @@ private: InputCorpus &Corpus; MutationDispatcher &MD; FuzzingOptions Options; + DataFlowTrace DFT; system_clock::time_point ProcessStartTime = system_clock::now(); system_clock::time_point UnitStartTime, UnitStopTime; diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp index 9c19ba913205..27bd5ee65516 100644 --- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -160,6 +160,7 @@ Fuzzer::Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD, CurrentUnitSize = 0; memset(BaseSha1, 0, sizeof(BaseSha1)); TPC.SetFocusFunction(Options.FocusFunction); + DFT.Init(Options.DataFlowTrace, Options.FocusFunction); } Fuzzer::~Fuzzer() {} diff --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h index 946f0b9d60b0..7a52d3624514 100644 --- a/compiler-rt/lib/fuzzer/FuzzerOptions.h +++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h @@ -46,6 +46,7 @@ struct FuzzingOptions { std::string ExitOnSrcPos; std::string ExitOnItem; std::string FocusFunction; + std::string DataFlowTrace; bool SaveArtifacts = true; bool PrintNEW = true; // Print a status line when new units are found; bool PrintNewCovPcs = false; diff --git a/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp b/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp index 99863074d726..a79c796ac456 100644 --- a/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp +++ b/compiler-rt/lib/fuzzer/dataflow/DataFlow.cpp @@ -69,6 +69,7 @@ static const uintptr_t *FuncsBeg; static __thread size_t CurrentFunc; static dfsan_label *FuncLabels; // Array of NumFuncs elements. static char *PrintableStringForLabel; // InputLen + 2 bytes. +static bool LabelSeen[1 << 8 * sizeof(dfsan_label)]; // Prints all instrumented functions. static int PrintFunctions() { @@ -89,7 +90,11 @@ static int PrintFunctions() { return 0; } -static void SetBytesForLabel(dfsan_label L, char *Bytes) { +extern "C" +void SetBytesForLabel(dfsan_label L, char *Bytes) { + if (LabelSeen[L]) + return; + LabelSeen[L] = true; assert(L); if (L <= InputLen + 1) { Bytes[L - 1] = '1'; @@ -103,6 +108,7 @@ static void SetBytesForLabel(dfsan_label L, char *Bytes) { static char *GetPrintableStringForLabel(dfsan_label L) { memset(PrintableStringForLabel, '0', InputLen + 1); PrintableStringForLabel[InputLen + 1] = 0; + memset(LabelSeen, 0, sizeof(LabelSeen)); SetBytesForLabel(L, PrintableStringForLabel); return PrintableStringForLabel; } diff --git a/compiler-rt/lib/fuzzer/scripts/collect_data_flow.py b/compiler-rt/lib/fuzzer/scripts/collect_data_flow.py index d13f6dcc4110..c3faf71c0af7 100755 --- a/compiler-rt/lib/fuzzer/scripts/collect_data_flow.py +++ b/compiler-rt/lib/fuzzer/scripts/collect_data_flow.py @@ -11,9 +11,15 @@ # the complete trace for all input bytes (running it on all bytes at once # may fail if DFSan runs out of labels). # Usage: -# collect_data_flow.py BINARY INPUT [RESULT] +# +# # Collect dataflow for one input, store it in OUTPUT (default is stdout) +# collect_data_flow.py BINARY INPUT [OUTPUT] +# +# # Collect dataflow for all inputs in CORPUS_DIR, store them in OUTPUT_DIR +# collect_data_flow.py BINARY CORPUS_DIR OUTPUT_DIR #===------------------------------------------------------------------------===# import atexit +import hashlib import sys import os import subprocess @@ -26,9 +32,26 @@ def cleanup(d): print "removing: ", d shutil.rmtree(d) +def collect_dataflow_for_corpus(self, exe, corpus_dir, output_dir): + print "Collecting dataflow for corpus:", corpus_dir, \ + "output_dir:", output_dir + assert not os.path.exists(output_dir) + os.mkdir(output_dir) + for root, dirs, files in os.walk(corpus_dir): + for f in files: + path = os.path.join(root, f) + sha1 = hashlib.sha1(open(path).read()).hexdigest() + output = os.path.join(output_dir, sha1) + subprocess.call([self, exe, path, output]) + functions_txt = open(os.path.join(output_dir, "functions.txt"), "w") + subprocess.call([exe], stdout=functions_txt) + + def main(argv): exe = argv[1] inp = argv[2] + if os.path.isdir(inp): + return collect_dataflow_for_corpus(argv[0], exe, inp, argv[3]) size = os.path.getsize(inp) q = [[0, size]] tmpdir = tempfile.mkdtemp(prefix="libfuzzer-tmp-") diff --git a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp index a38a45344e9c..0b8673876a92 100644 --- a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp +++ b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp @@ -28,6 +28,14 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { abort(); } +TEST(Fuzzer, Basename) { + EXPECT_EQ(Basename("foo/bar"), "bar"); + EXPECT_EQ(Basename("bar"), "bar"); + EXPECT_EQ(Basename("/bar"), "bar"); + EXPECT_EQ(Basename("foo/x"), "x"); + EXPECT_EQ(Basename("foo/"), ""); +} + TEST(Fuzzer, CrossOver) { std::unique_ptr t(new ExternalFunctions()); fuzzer::EF = t.get(); diff --git a/compiler-rt/test/fuzzer/ThreeFunctionsTest.cpp b/compiler-rt/test/fuzzer/ThreeFunctionsTest.cpp index 0ff682abc953..1278cb05633d 100644 --- a/compiler-rt/test/fuzzer/ThreeFunctionsTest.cpp +++ b/compiler-rt/test/fuzzer/ThreeFunctionsTest.cpp @@ -8,12 +8,14 @@ #include #include +extern "C" __attribute__((noinline)) -static bool Func1(const uint8_t *Data, size_t Size) { +bool Func1(const uint8_t *Data, size_t Size) { // assumes Size >= 5, doesn't check it. return Data[4] == 'M'; } +extern "C" __attribute__((noinline)) bool Func2(const uint8_t *Data, size_t Size) { return Size >= 6 && Data[5] == 'E'; diff --git a/compiler-rt/test/fuzzer/dataflow.test b/compiler-rt/test/fuzzer/dataflow.test index 7162b06f6d25..7b85c6f9e454 100644 --- a/compiler-rt/test/fuzzer/dataflow.test +++ b/compiler-rt/test/fuzzer/dataflow.test @@ -5,6 +5,7 @@ REQUIRES: linux RUN: %no_fuzzer_cpp_compiler -c -fno-sanitize=all -fsanitize=dataflow %S/../../lib/fuzzer/dataflow/DataFlow.cpp -o %t-DataFlow.o RUN: %no_fuzzer_cpp_compiler -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp %S/ThreeFunctionsTest.cpp %t-DataFlow.o -o %t-ThreeFunctionsTestDF RUN: %no_fuzzer_cpp_compiler -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp %S/ExplodeDFSanLabelsTest.cpp %t-DataFlow.o -o %t-ExplodeDFSanLabelsTestDF +RUN: %cpp_compiler %S/ThreeFunctionsTest.cpp -o %t-ThreeFunctionsTest # Dump the function list. RUN: %t-ThreeFunctionsTestDF 2>&1 | FileCheck %s --check-prefix=FUNC_LIST @@ -70,3 +71,13 @@ RUN: %t-ExplodeDFSanLabelsTestDF 2 4 %t/IN/1234567890123456 RUN: %t-ExplodeDFSanLabelsTestDF 4 6 %t/IN/1234567890123456 # Or we can use collect_data_flow RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ExplodeDFSanLabelsTestDF %t/IN/1234567890123456 + +# Test that we can run collect_data_flow on the entire corpus dir +RUN: rm -rf %t/OUT +RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ThreeFunctionsTestDF %t/IN %t/OUT +RUN: %t-ThreeFunctionsTest -data_flow_trace=%t/OUT -runs=0 -focus_function=Func2 2>&1 | FileCheck %s --check-prefix=USE_DATA_FLOW_TRACE +USE_DATA_FLOW_TRACE: INFO: Focus function is set to 'Func2' +USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: reading from {{.*}}/OUT +USE_DATA_FLOW_TRACE-DAG: a8eefe2fd5d6b32028f355fafa3e739a6bf5edc => |000001| +USE_DATA_FLOW_TRACE-DGA: d28cb407e8e1a702c72d25473f0553d3ec172262 => |0000011| +USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: 6 trace files, 3 functions, 2 traces with focus function