[libFuzzer] split DataFlow.cpp into two .cpp files, one of which can be compiled w/o dfsan to speed things up (~25% speedup)

llvm-svn: 364002
This commit is contained in:
Kostya Serebryany 2019-06-21 01:39:35 +00:00
parent bc0d08e0ee
commit 679669a77e
6 changed files with 153 additions and 103 deletions

View File

@ -17,10 +17,11 @@
// and also provides basic-block coverage for every input.
//
// Build:
// 1. Compile this file with -fsanitize=dataflow
// 2. Build the fuzz target with -g -fsanitize=dataflow
// 1. Compile this file (DataFlow.cpp) with -fsanitize=dataflow and -O2.
// 2. Compile DataFlowCallbacks.cpp with -O2 -fPIC.
// 3. Build the fuzz target with -g -fsanitize=dataflow
// -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp
// 3. Link those together with -fsanitize=dataflow
// 4. Link those together with -fsanitize=dataflow
//
// -fsanitize-coverage=trace-cmp inserts callbacks around every comparison
// instruction, DFSan modifies the calls to pass the data flow labels.
@ -63,32 +64,24 @@
#include <execinfo.h> // backtrace_symbols_fd
#include <sanitizer/dfsan_interface.h>
#include "DataFlow.h"
extern "C" {
extern int LLVMFuzzerTestOneInput(const unsigned char *Data, size_t Size);
__attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv);
} // extern "C"
CallbackData __dft;
static size_t InputLen;
static size_t NumIterations;
static size_t NumFuncs, NumGuards;
static uint32_t *GuardsBeg, *GuardsEnd;
static const uintptr_t *PCsBeg, *PCsEnd;
static __thread size_t CurrentFunc, CurrentIteration;
static dfsan_label **FuncLabels; // NumFuncs x NumIterations.
static bool *BBExecuted; // Array of NumGuards elements.
enum {
PCFLAG_FUNC_ENTRY = 1,
};
const int kNumLabels = 16;
static dfsan_label **FuncLabelsPerIter; // NumIterations x NumFuncs;
static inline bool BlockIsEntry(size_t BlockIdx) {
return PCsBeg[BlockIdx * 2 + 1] & PCFLAG_FUNC_ENTRY;
return __dft.PCsBeg[BlockIdx * 2 + 1] & PCFLAG_FUNC_ENTRY;
}
const int kNumLabels = 16;
// Prints all instrumented functions.
static int PrintFunctions() {
// We don't have the symbolizer integrated with dfsan yet.
@ -101,8 +94,8 @@ static int PrintFunctions() {
"| sed 's/dfs\\$//g' "
"| c++filt",
"w");
for (size_t I = 0; I < NumGuards; I++) {
uintptr_t PC = PCsBeg[I * 2];
for (size_t I = 0; I < __dft.NumGuards; I++) {
uintptr_t PC = __dft.PCsBeg[I * 2];
if (!BlockIsEntry(I)) continue;
void *const Buf[1] = {(void*)PC};
backtrace_symbols_fd(Buf, 1, fileno(Pipe));
@ -121,10 +114,10 @@ static void PrintBinary(FILE *Out, dfsan_label L, size_t Len) {
}
static void PrintDataFlow(FILE *Out) {
for (size_t Func = 0; Func < NumFuncs; Func++) {
for (size_t Func = 0; Func < __dft.NumFuncs; Func++) {
bool HasAny = false;
for (size_t Iter = 0; Iter < NumIterations; Iter++)
if (FuncLabels[Func][Iter])
if (FuncLabelsPerIter[Iter][Func])
HasAny = true;
if (!HasAny)
continue;
@ -133,7 +126,7 @@ static void PrintDataFlow(FILE *Out) {
if (auto Tail = InputLen % kNumLabels)
LenOfLastIteration = Tail;
for (size_t Iter = 0; Iter < NumIterations; Iter++)
PrintBinary(Out, FuncLabels[Func][Iter],
PrintBinary(Out, FuncLabelsPerIter[Iter][Func],
Iter == NumIterations - 1 ? LenOfLastIteration : kNumLabels);
fprintf(Out, "\n");
}
@ -143,16 +136,16 @@ static void PrintCoverage(FILE *Out) {
ssize_t CurrentFuncGuard = -1;
ssize_t CurrentFuncNum = -1;
ssize_t NumBlocksInCurrentFunc = -1;
for (size_t FuncBeg = 0; FuncBeg < NumGuards;) {
for (size_t FuncBeg = 0; FuncBeg < __dft.NumGuards;) {
CurrentFuncNum++;
assert(BlockIsEntry(FuncBeg));
size_t FuncEnd = FuncBeg + 1;
for (; FuncEnd < NumGuards && !BlockIsEntry(FuncEnd); FuncEnd++)
for (; FuncEnd < __dft.NumGuards && !BlockIsEntry(FuncEnd); FuncEnd++)
;
if (BBExecuted[FuncBeg]) {
if (__dft.BBExecuted[FuncBeg]) {
fprintf(Out, "C%zd", CurrentFuncNum);
for (size_t I = FuncBeg + 1; I < FuncEnd; I++)
if (BBExecuted[I])
if (__dft.BBExecuted[I])
fprintf(Out, " %zd", I - FuncBeg);
fprintf(Out, " %zd\n", FuncEnd - FuncBeg);
}
@ -180,19 +173,19 @@ int main(int argc, char **argv) {
fclose(In);
NumIterations = (NumBytesRead + kNumLabels - 1) / kNumLabels;
FuncLabels = (dfsan_label**)calloc(NumFuncs, sizeof(dfsan_label*));
for (size_t Func = 0; Func < NumFuncs; Func++)
FuncLabels[Func] =
(dfsan_label *)calloc(NumIterations, sizeof(dfsan_label));
FuncLabelsPerIter =
(dfsan_label **)calloc(NumIterations, sizeof(dfsan_label *));
for (size_t Iter = 0; Iter < NumIterations; Iter++)
FuncLabelsPerIter[Iter] =
(dfsan_label *)calloc(__dft.NumFuncs, sizeof(dfsan_label));
for (CurrentIteration = 0; CurrentIteration < NumIterations;
CurrentIteration++) {
fprintf(stderr, "INFO: running '%s' %zd/%zd\n", Input, CurrentIteration,
NumIterations);
for (size_t Iter = 0; Iter < NumIterations; Iter++) {
fprintf(stderr, "INFO: running '%s' %zd/%zd\n", Input, Iter, NumIterations);
dfsan_flush();
dfsan_set_label(0, Buf, InputLen);
__dft.FuncLabels = FuncLabelsPerIter[Iter];
size_t BaseIdx = CurrentIteration * kNumLabels;
size_t BaseIdx = Iter * kNumLabels;
size_t LastIdx = BaseIdx + kNumLabels < NumBytesRead ? BaseIdx + kNumLabels
: NumBytesRead;
assert(BaseIdx < LastIdx);
@ -210,67 +203,3 @@ int main(int argc, char **argv) {
PrintCoverage(Out);
if (!OutIsStdout) fclose(Out);
}
extern "C" {
void __sanitizer_cov_trace_pc_guard_init(uint32_t *start,
uint32_t *stop) {
assert(NumFuncs == 0 && "This tool does not support DSOs");
assert(start < stop && "The code is not instrumented for coverage");
if (start == stop || *start) return; // Initialize only once.
GuardsBeg = start;
GuardsEnd = stop;
}
void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
const uintptr_t *pcs_end) {
if (NumGuards) return; // Initialize only once.
NumGuards = GuardsEnd - GuardsBeg;
PCsBeg = pcs_beg;
PCsEnd = pcs_end;
assert(NumGuards == (PCsEnd - PCsBeg) / 2);
for (size_t i = 0; i < NumGuards; i++) {
if (BlockIsEntry(i)) {
NumFuncs++;
GuardsBeg[i] = NumFuncs;
}
}
BBExecuted = (bool*)calloc(NumGuards, sizeof(bool));
fprintf(stderr, "INFO: %zd instrumented function(s) observed "
"and %zd basic blocks\n", NumFuncs, NumGuards);
}
void __sanitizer_cov_trace_pc_indir(uint64_t x){} // unused.
void __sanitizer_cov_trace_pc_guard(uint32_t *guard) {
size_t GuardIdx = guard - GuardsBeg;
assert(GuardIdx < NumGuards);
BBExecuted[GuardIdx] = true;
if (!*guard) return; // not a function entry.
uint32_t FuncNum = *guard - 1; // Guards start from 1.
assert(FuncNum < NumFuncs);
CurrentFunc = FuncNum;
}
void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases,
dfsan_label L1, dfsan_label UnusedL) {
assert(CurrentFunc < NumFuncs);
FuncLabels[CurrentFunc][CurrentIteration] |= L1;
}
#define HOOK(Name, Type) \
void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) { \
assert(CurrentFunc < NumFuncs); \
FuncLabels[CurrentFunc][CurrentIteration] |= L1 | L2; \
}
HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t)
HOOK(__dfsw___sanitizer_cov_trace_const_cmp2, uint16_t)
HOOK(__dfsw___sanitizer_cov_trace_const_cmp4, uint32_t)
HOOK(__dfsw___sanitizer_cov_trace_const_cmp8, uint64_t)
HOOK(__dfsw___sanitizer_cov_trace_cmp1, uint8_t)
HOOK(__dfsw___sanitizer_cov_trace_cmp2, uint16_t)
HOOK(__dfsw___sanitizer_cov_trace_cmp4, uint32_t)
HOOK(__dfsw___sanitizer_cov_trace_cmp8, uint64_t)
} // extern "C"

View File

@ -0,0 +1,32 @@
/*===- DataFlow.h - a standalone DataFlow trace -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Internal header file to connect DataFlow.cpp and DataFlowCallbacks.cpp.
//===----------------------------------------------------------------------===*/
#ifndef __LIBFUZZER_DATAFLOW_H
#define __LIBFUZZER_DATAFLOW_H
#include <cstddef>
#include <cstdint>
#include <sanitizer/dfsan_interface.h>
// This data is shared between DataFlowCallbacks.cpp and DataFlow.cpp.
struct CallbackData {
size_t NumFuncs, NumGuards;
const uintptr_t *PCsBeg, *PCsEnd;
dfsan_label *FuncLabels; // Array of NumFuncs elements.
bool *BBExecuted; // Array of NumGuards elements.
};
extern CallbackData __dft;
enum {
PCFLAG_FUNC_ENTRY = 1,
};
#endif // __LIBFUZZER_DATAFLOW_H

View File

@ -0,0 +1,86 @@
/*===- DataFlowCallbacks.cpp - a standalone DataFlow trace -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Instrumentation callbacks for DataFlow.cpp.
// These functions should not be instrumented by DFSan, so we
// keep them in a separate file and compile it w/o DFSan.
//===----------------------------------------------------------------------===*/
#include "DataFlow.h"
#include <cassert>
#include <cstdio>
#include <cstdlib>
static __thread size_t CurrentFunc;
static uint32_t *GuardsBeg, *GuardsEnd;
static inline bool BlockIsEntry(size_t BlockIdx) {
return __dft.PCsBeg[BlockIdx * 2 + 1] & PCFLAG_FUNC_ENTRY;
}
extern "C" {
void __sanitizer_cov_trace_pc_guard_init(uint32_t *start,
uint32_t *stop) {
assert(__dft.NumFuncs == 0 && "This tool does not support DSOs");
assert(start < stop && "The code is not instrumented for coverage");
if (start == stop || *start) return; // Initialize only once.
GuardsBeg = start;
GuardsEnd = stop;
}
void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
const uintptr_t *pcs_end) {
if (__dft.NumGuards) return; // Initialize only once.
__dft.NumGuards = GuardsEnd - GuardsBeg;
__dft.PCsBeg = pcs_beg;
__dft.PCsEnd = pcs_end;
assert(__dft.NumGuards == (__dft.PCsEnd - __dft.PCsBeg) / 2);
for (size_t i = 0; i < __dft.NumGuards; i++) {
if (BlockIsEntry(i)) {
__dft.NumFuncs++;
GuardsBeg[i] = __dft.NumFuncs;
}
}
__dft.BBExecuted = (bool*)calloc(__dft.NumGuards, sizeof(bool));
fprintf(stderr, "INFO: %zd instrumented function(s) observed "
"and %zd basic blocks\n", __dft.NumFuncs, __dft.NumGuards);
}
void __sanitizer_cov_trace_pc_indir(uint64_t x){} // unused.
void __sanitizer_cov_trace_pc_guard(uint32_t *guard) {
size_t GuardIdx = guard - GuardsBeg;
// assert(GuardIdx < __dft.NumGuards);
__dft.BBExecuted[GuardIdx] = true;
if (!*guard) return; // not a function entry.
uint32_t FuncNum = *guard - 1; // Guards start from 1.
// assert(FuncNum < __dft.NumFuncs);
CurrentFunc = FuncNum;
}
void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases,
dfsan_label L1, dfsan_label UnusedL) {
assert(CurrentFunc < __dft.NumFuncs);
__dft.FuncLabels[CurrentFunc] |= L1;
}
#define HOOK(Name, Type) \
void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) { \
__dft.FuncLabels[CurrentFunc] |= L1 | L2; \
}
//assert(CurrentFunc < __dft.NumFuncs);
HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t)
HOOK(__dfsw___sanitizer_cov_trace_const_cmp2, uint16_t)
HOOK(__dfsw___sanitizer_cov_trace_const_cmp4, uint32_t)
HOOK(__dfsw___sanitizer_cov_trace_const_cmp8, uint64_t)
HOOK(__dfsw___sanitizer_cov_trace_cmp1, uint8_t)
HOOK(__dfsw___sanitizer_cov_trace_cmp2, uint16_t)
HOOK(__dfsw___sanitizer_cov_trace_cmp4, uint32_t)
HOOK(__dfsw___sanitizer_cov_trace_cmp8, uint64_t)
} // extern "C"

View File

@ -3,8 +3,9 @@ REQUIRES: linux, x86_64
# Build the tracer and the test.
RUN: %no_fuzzer_cpp_compiler -c -fno-sanitize=all -fsanitize=dataflow %S/../../lib/fuzzer/dataflow/DataFlow.cpp -o %t-DataFlow.o
RUN: %no_fuzzer_cpp_compiler -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp %S/ThreeFunctionsTest.cpp %t-DataFlow.o -o %t-ThreeFunctionsTestDF
RUN: %no_fuzzer_cpp_compiler -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp %S/Labels20Test.cpp %t-DataFlow.o -o %t-Labels20TestDF
RUN: %no_fuzzer_cpp_compiler -c -fno-sanitize=all -fPIC %S/../../lib/fuzzer/dataflow/DataFlowCallbacks.cpp -o %t-DataFlowCallbacks.o
RUN: %no_fuzzer_cpp_compiler -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp %S/ThreeFunctionsTest.cpp %t-DataFlow*.o -o %t-ThreeFunctionsTestDF
RUN: %no_fuzzer_cpp_compiler -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp %S/Labels20Test.cpp %t-DataFlow*.o -o %t-Labels20TestDF
RUN: %cpp_compiler %S/ThreeFunctionsTest.cpp -o %t-ThreeFunctionsTest
# Dump the function list.

View File

@ -3,7 +3,8 @@ REQUIRES: linux, x86_64
# Build the tracer and the test.
RUN: %no_fuzzer_cpp_compiler -c -fno-sanitize=all -fsanitize=dataflow %S/../../lib/fuzzer/dataflow/DataFlow.cpp -o %t-DataFlow.o
RUN: %no_fuzzer_cpp_compiler -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp %S/OnlySomeBytesTest.cpp %t-DataFlow.o -o %t-DFT
RUN: %no_fuzzer_cpp_compiler -c -fno-sanitize=all -fPIC %S/../../lib/fuzzer/dataflow/DataFlowCallbacks.cpp -o %t-DataFlowCallbacks.o
RUN: %no_fuzzer_cpp_compiler -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp %S/OnlySomeBytesTest.cpp %t-DataFlow*.o -o %t-DFT
RUN: %cpp_compiler %S/OnlySomeBytesTest.cpp -o %t-Fuzz
# Test that the fork mode can collect and use the DFT

View File

@ -3,7 +3,8 @@ REQUIRES: linux, x86_64
# Build the tracer and the test.
RUN: %no_fuzzer_cpp_compiler -c -fno-sanitize=all -fsanitize=dataflow %S/../../lib/fuzzer/dataflow/DataFlow.cpp -o %t-DataFlow.o
RUN: %no_fuzzer_cpp_compiler -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp %S/OnlySomeBytesTest.cpp %t-DataFlow.o -o %t-DFT
RUN: %no_fuzzer_cpp_compiler -c -fno-sanitize=all -fPIC %S/../../lib/fuzzer/dataflow/DataFlowCallbacks.cpp -o %t-DataFlowCallbacks.o
RUN: %no_fuzzer_cpp_compiler -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp %S/OnlySomeBytesTest.cpp %t-DataFlow*.o -o %t-DFT
RUN: %cpp_compiler %S/OnlySomeBytesTest.cpp -o %t-Fuzz
# Prepare the inputs.