forked from OSchip/llvm-project
[libFuzzer] initial implementation of -data_flow_trace. It parses the data flow trace and prints the summary, but doesn't use the information in any other way yet
llvm-svn: 334058
This commit is contained in:
parent
f3914b74c1
commit
1fd005f552
|
@ -1,5 +1,6 @@
|
|||
set(LIBFUZZER_SOURCES
|
||||
FuzzerCrossOver.cpp
|
||||
FuzzerDataFlowTrace.cpp
|
||||
FuzzerDriver.cpp
|
||||
FuzzerExtFunctionsDlsym.cpp
|
||||
FuzzerExtFunctionsDlsymWin.cpp
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
//===- FuzzerDataFlowTrace.cpp - DataFlowTrace ---*- C++ -* ===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// fuzzer::DataFlowTrace
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "FuzzerDataFlowTrace.h"
|
||||
#include "FuzzerIO.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace fuzzer {
|
||||
|
||||
void DataFlowTrace::Init(const std::string &DirPath,
|
||||
const std::string &FocusFunction) {
|
||||
if (DirPath.empty()) return;
|
||||
const char *kFunctionsTxt = "functions.txt";
|
||||
Printf("INFO: DataFlowTrace: reading from '%s'\n", DirPath.c_str());
|
||||
Vector<SizedFile> Files;
|
||||
GetSizedFilesFromDir(DirPath, &Files);
|
||||
std::string L;
|
||||
|
||||
// Read functions.txt
|
||||
std::ifstream IF(DirPlusFile(DirPath, kFunctionsTxt));
|
||||
size_t FocusFuncIdx = SIZE_MAX;
|
||||
size_t NumFunctions = 0;
|
||||
while (std::getline(IF, L, '\n')) {
|
||||
NumFunctions++;
|
||||
if (FocusFunction == L)
|
||||
FocusFuncIdx = NumFunctions - 1;
|
||||
}
|
||||
if (!NumFunctions || FocusFuncIdx == SIZE_MAX || Files.size() <= 1)
|
||||
return;
|
||||
// Read traces.
|
||||
size_t NumTraceFiles = 0;
|
||||
size_t NumTracesWithFocusFunction = 0;
|
||||
for (auto &SF : Files) {
|
||||
auto Name = Basename(SF.File);
|
||||
if (Name == kFunctionsTxt) continue;
|
||||
auto ParseError = [&](const char *Err) {
|
||||
Printf("DataFlowTrace: parse error: %s\n File: %s\n Line: %s\n", Err,
|
||||
Name.c_str(), L.c_str());
|
||||
};
|
||||
NumTraceFiles++;
|
||||
// Printf("=== %s\n", Name.c_str());
|
||||
std::ifstream IF(SF.File);
|
||||
while (std::getline(IF, L, '\n')) {
|
||||
size_t SpacePos = L.find(' ');
|
||||
if (SpacePos == std::string::npos)
|
||||
return ParseError("no space in the trace line");
|
||||
if (L.empty() || L[0] != 'F')
|
||||
return ParseError("the trace line doesn't start with 'F'");
|
||||
size_t N = std::atol(L.c_str() + 1);
|
||||
if (N >= NumFunctions)
|
||||
return ParseError("N is greater than the number of functions");
|
||||
if (N == FocusFuncIdx) {
|
||||
NumTracesWithFocusFunction++;
|
||||
const char *Beg = L.c_str() + SpacePos + 1;
|
||||
const char *End = L.c_str() + L.size();
|
||||
assert(Beg < End);
|
||||
size_t Len = End - Beg;
|
||||
Vector<bool> V(Len);
|
||||
for (size_t I = 0; I < Len; I++) {
|
||||
if (Beg[I] != '0' && Beg[I] != '1')
|
||||
ParseError("the trace should contain only 0 or 1");
|
||||
V[I] = Beg[I] == '1';
|
||||
}
|
||||
// Print just a few small traces.
|
||||
if (NumTracesWithFocusFunction <= 3 && Len <= 16)
|
||||
Printf("%s => |%s|\n", Name.c_str(), L.c_str() + SpacePos + 1);
|
||||
break; // No need to parse the following lines.
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(NumTraceFiles == Files.size() - 1);
|
||||
Printf("INFO: DataFlowTrace: %zd trace files, %zd functions, "
|
||||
"%zd traces with focus function\n",
|
||||
NumTraceFiles, NumFunctions, NumTracesWithFocusFunction);
|
||||
}
|
||||
|
||||
} // namespace fuzzer
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
//===- FuzzerDataFlowTrace.h - Internal header for the Fuzzer ---*- C++ -* ===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// fuzzer::DataFlowTrace; reads and handles a data-flow trace.
|
||||
//
|
||||
// A data flow trace is generated by e.g. dataflow/DataFlow.cpp
|
||||
// and is stored on disk in a separate directory.
|
||||
//
|
||||
// The trace dir contains a file 'functions.txt' which lists function names,
|
||||
// oner per line, e.g.
|
||||
// ==> functions.txt <==
|
||||
// Func2
|
||||
// LLVMFuzzerTestOneInput
|
||||
// Func1
|
||||
//
|
||||
// All other files in the dir are the traces, see dataflow/DataFlow.cpp.
|
||||
// The name of the file is sha1 of the input used to generate the trace.
|
||||
//
|
||||
// Current status:
|
||||
// the data is parsed and the summary is printed, but the data is not yet
|
||||
// used in any other way.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_FUZZER_DATA_FLOW_TRACE
|
||||
#define LLVM_FUZZER_DATA_FLOW_TRACE
|
||||
|
||||
#include "FuzzerDefs.h"
|
||||
|
||||
namespace fuzzer {
|
||||
struct DataFlowTrace {
|
||||
void Init(const std::string &DirPath, const std::string &FocusFunction);
|
||||
};
|
||||
} // namespace fuzzer
|
||||
|
||||
#endif // LLVM_FUZZER_DATA_FLOW_TRACE
|
|
@ -623,6 +623,8 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
|
|||
Options.ExitOnItem = Flags.exit_on_item;
|
||||
if (Flags.focus_function)
|
||||
Options.FocusFunction = Flags.focus_function;
|
||||
if (Flags.data_flow_trace)
|
||||
Options.DataFlowTrace = Flags.data_flow_trace;
|
||||
|
||||
unsigned Seed = Flags.seed;
|
||||
// Initialize Seed.
|
||||
|
|
|
@ -153,3 +153,5 @@ FUZZER_DEPRECATED_FLAG(use_equivalence_server)
|
|||
FUZZER_FLAG_INT(analyze_dict, 0, "Experimental")
|
||||
FUZZER_DEPRECATED_FLAG(use_clang_coverage)
|
||||
FUZZER_FLAG_INT(use_feature_frequency, 0, "Experimental/internal")
|
||||
|
||||
FUZZER_FLAG_STRING(data_flow_trace, "Experimental: use the data flow trace")
|
||||
|
|
|
@ -100,6 +100,14 @@ std::string DirPlusFile(const std::string &DirPath,
|
|||
return DirPath + GetSeparator() + FileName;
|
||||
}
|
||||
|
||||
std::string Basename(const std::string &Path, char Separator) {
|
||||
size_t Pos = Path.rfind(Separator);
|
||||
if (Pos == std::string::npos)
|
||||
return Path;
|
||||
assert(Pos < Path.size());
|
||||
return Path.substr(Pos + 1);
|
||||
}
|
||||
|
||||
void DupAndCloseStderr() {
|
||||
int OutputFd = DuplicateFile(2);
|
||||
if (OutputFd > 0) {
|
||||
|
|
|
@ -67,6 +67,8 @@ struct SizedFile {
|
|||
void GetSizedFilesFromDir(const std::string &Dir, Vector<SizedFile> *V);
|
||||
|
||||
char GetSeparator();
|
||||
// Similar to the basename utility: returns the file name w/o the dir prefix.
|
||||
std::string Basename(const std::string &Path, char Separator = GetSeparator());
|
||||
|
||||
FILE* OpenFile(int Fd, const char *Mode);
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#ifndef LLVM_FUZZER_INTERNAL_H
|
||||
#define LLVM_FUZZER_INTERNAL_H
|
||||
|
||||
#include "FuzzerDataFlowTrace.h"
|
||||
#include "FuzzerDefs.h"
|
||||
#include "FuzzerExtFunctions.h"
|
||||
#include "FuzzerInterface.h"
|
||||
|
@ -134,6 +135,7 @@ private:
|
|||
InputCorpus &Corpus;
|
||||
MutationDispatcher &MD;
|
||||
FuzzingOptions Options;
|
||||
DataFlowTrace DFT;
|
||||
|
||||
system_clock::time_point ProcessStartTime = system_clock::now();
|
||||
system_clock::time_point UnitStartTime, UnitStopTime;
|
||||
|
|
|
@ -160,6 +160,7 @@ Fuzzer::Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD,
|
|||
CurrentUnitSize = 0;
|
||||
memset(BaseSha1, 0, sizeof(BaseSha1));
|
||||
TPC.SetFocusFunction(Options.FocusFunction);
|
||||
DFT.Init(Options.DataFlowTrace, Options.FocusFunction);
|
||||
}
|
||||
|
||||
Fuzzer::~Fuzzer() {}
|
||||
|
|
|
@ -46,6 +46,7 @@ struct FuzzingOptions {
|
|||
std::string ExitOnSrcPos;
|
||||
std::string ExitOnItem;
|
||||
std::string FocusFunction;
|
||||
std::string DataFlowTrace;
|
||||
bool SaveArtifacts = true;
|
||||
bool PrintNEW = true; // Print a status line when new units are found;
|
||||
bool PrintNewCovPcs = false;
|
||||
|
|
|
@ -69,6 +69,7 @@ static const uintptr_t *FuncsBeg;
|
|||
static __thread size_t CurrentFunc;
|
||||
static dfsan_label *FuncLabels; // Array of NumFuncs elements.
|
||||
static char *PrintableStringForLabel; // InputLen + 2 bytes.
|
||||
static bool LabelSeen[1 << 8 * sizeof(dfsan_label)];
|
||||
|
||||
// Prints all instrumented functions.
|
||||
static int PrintFunctions() {
|
||||
|
@ -89,7 +90,11 @@ static int PrintFunctions() {
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void SetBytesForLabel(dfsan_label L, char *Bytes) {
|
||||
extern "C"
|
||||
void SetBytesForLabel(dfsan_label L, char *Bytes) {
|
||||
if (LabelSeen[L])
|
||||
return;
|
||||
LabelSeen[L] = true;
|
||||
assert(L);
|
||||
if (L <= InputLen + 1) {
|
||||
Bytes[L - 1] = '1';
|
||||
|
@ -103,6 +108,7 @@ static void SetBytesForLabel(dfsan_label L, char *Bytes) {
|
|||
static char *GetPrintableStringForLabel(dfsan_label L) {
|
||||
memset(PrintableStringForLabel, '0', InputLen + 1);
|
||||
PrintableStringForLabel[InputLen + 1] = 0;
|
||||
memset(LabelSeen, 0, sizeof(LabelSeen));
|
||||
SetBytesForLabel(L, PrintableStringForLabel);
|
||||
return PrintableStringForLabel;
|
||||
}
|
||||
|
|
|
@ -11,9 +11,15 @@
|
|||
# the complete trace for all input bytes (running it on all bytes at once
|
||||
# may fail if DFSan runs out of labels).
|
||||
# Usage:
|
||||
# collect_data_flow.py BINARY INPUT [RESULT]
|
||||
#
|
||||
# # Collect dataflow for one input, store it in OUTPUT (default is stdout)
|
||||
# collect_data_flow.py BINARY INPUT [OUTPUT]
|
||||
#
|
||||
# # Collect dataflow for all inputs in CORPUS_DIR, store them in OUTPUT_DIR
|
||||
# collect_data_flow.py BINARY CORPUS_DIR OUTPUT_DIR
|
||||
#===------------------------------------------------------------------------===#
|
||||
import atexit
|
||||
import hashlib
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
|
@ -26,9 +32,26 @@ def cleanup(d):
|
|||
print "removing: ", d
|
||||
shutil.rmtree(d)
|
||||
|
||||
def collect_dataflow_for_corpus(self, exe, corpus_dir, output_dir):
|
||||
print "Collecting dataflow for corpus:", corpus_dir, \
|
||||
"output_dir:", output_dir
|
||||
assert not os.path.exists(output_dir)
|
||||
os.mkdir(output_dir)
|
||||
for root, dirs, files in os.walk(corpus_dir):
|
||||
for f in files:
|
||||
path = os.path.join(root, f)
|
||||
sha1 = hashlib.sha1(open(path).read()).hexdigest()
|
||||
output = os.path.join(output_dir, sha1)
|
||||
subprocess.call([self, exe, path, output])
|
||||
functions_txt = open(os.path.join(output_dir, "functions.txt"), "w")
|
||||
subprocess.call([exe], stdout=functions_txt)
|
||||
|
||||
|
||||
def main(argv):
|
||||
exe = argv[1]
|
||||
inp = argv[2]
|
||||
if os.path.isdir(inp):
|
||||
return collect_dataflow_for_corpus(argv[0], exe, inp, argv[3])
|
||||
size = os.path.getsize(inp)
|
||||
q = [[0, size]]
|
||||
tmpdir = tempfile.mkdtemp(prefix="libfuzzer-tmp-")
|
||||
|
|
|
@ -28,6 +28,14 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
|
|||
abort();
|
||||
}
|
||||
|
||||
TEST(Fuzzer, Basename) {
|
||||
EXPECT_EQ(Basename("foo/bar"), "bar");
|
||||
EXPECT_EQ(Basename("bar"), "bar");
|
||||
EXPECT_EQ(Basename("/bar"), "bar");
|
||||
EXPECT_EQ(Basename("foo/x"), "x");
|
||||
EXPECT_EQ(Basename("foo/"), "");
|
||||
}
|
||||
|
||||
TEST(Fuzzer, CrossOver) {
|
||||
std::unique_ptr<ExternalFunctions> t(new ExternalFunctions());
|
||||
fuzzer::EF = t.get();
|
||||
|
|
|
@ -8,12 +8,14 @@
|
|||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
|
||||
extern "C"
|
||||
__attribute__((noinline))
|
||||
static bool Func1(const uint8_t *Data, size_t Size) {
|
||||
bool Func1(const uint8_t *Data, size_t Size) {
|
||||
// assumes Size >= 5, doesn't check it.
|
||||
return Data[4] == 'M';
|
||||
}
|
||||
|
||||
extern "C"
|
||||
__attribute__((noinline))
|
||||
bool Func2(const uint8_t *Data, size_t Size) {
|
||||
return Size >= 6 && Data[5] == 'E';
|
||||
|
|
|
@ -5,6 +5,7 @@ REQUIRES: linux
|
|||
RUN: %no_fuzzer_cpp_compiler -c -fno-sanitize=all -fsanitize=dataflow %S/../../lib/fuzzer/dataflow/DataFlow.cpp -o %t-DataFlow.o
|
||||
RUN: %no_fuzzer_cpp_compiler -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp %S/ThreeFunctionsTest.cpp %t-DataFlow.o -o %t-ThreeFunctionsTestDF
|
||||
RUN: %no_fuzzer_cpp_compiler -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp %S/ExplodeDFSanLabelsTest.cpp %t-DataFlow.o -o %t-ExplodeDFSanLabelsTestDF
|
||||
RUN: %cpp_compiler %S/ThreeFunctionsTest.cpp -o %t-ThreeFunctionsTest
|
||||
|
||||
# Dump the function list.
|
||||
RUN: %t-ThreeFunctionsTestDF 2>&1 | FileCheck %s --check-prefix=FUNC_LIST
|
||||
|
@ -70,3 +71,13 @@ RUN: %t-ExplodeDFSanLabelsTestDF 2 4 %t/IN/1234567890123456
|
|||
RUN: %t-ExplodeDFSanLabelsTestDF 4 6 %t/IN/1234567890123456
|
||||
# Or we can use collect_data_flow
|
||||
RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ExplodeDFSanLabelsTestDF %t/IN/1234567890123456
|
||||
|
||||
# Test that we can run collect_data_flow on the entire corpus dir
|
||||
RUN: rm -rf %t/OUT
|
||||
RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ThreeFunctionsTestDF %t/IN %t/OUT
|
||||
RUN: %t-ThreeFunctionsTest -data_flow_trace=%t/OUT -runs=0 -focus_function=Func2 2>&1 | FileCheck %s --check-prefix=USE_DATA_FLOW_TRACE
|
||||
USE_DATA_FLOW_TRACE: INFO: Focus function is set to 'Func2'
|
||||
USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: reading from {{.*}}/OUT
|
||||
USE_DATA_FLOW_TRACE-DAG: a8eefe2fd5d6b32028f355fafa3e739a6bf5edc => |000001|
|
||||
USE_DATA_FLOW_TRACE-DGA: d28cb407e8e1a702c72d25473f0553d3ec172262 => |0000011|
|
||||
USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: 6 trace files, 3 functions, 2 traces with focus function
|
||||
|
|
Loading…
Reference in New Issue