[libFuzzer] make the corpus elements aware of their data flow traces

llvm-svn: 334158
This commit is contained in:
Kostya Serebryany 2018-06-07 01:40:20 +00:00
parent 177b458c8a
commit 67af99235f
6 changed files with 72 additions and 5 deletions

View File

@ -12,6 +12,7 @@
#ifndef LLVM_FUZZER_CORPUS
#define LLVM_FUZZER_CORPUS
#include "FuzzerDataFlowTrace.h"
#include "FuzzerDefs.h"
#include "FuzzerIO.h"
#include "FuzzerRandom.h"
@ -37,6 +38,7 @@ struct InputInfo {
bool Reduced = false;
bool HasFocusFunction = false;
Vector<uint32_t> UniqFeatureSet;
Vector<bool> DataFlowTraceForFocusFunction;
};
class InputCorpus {
@ -76,10 +78,17 @@ class InputCorpus {
});
}
size_t NumInputsWithDataFlowTrace() {
return std::count_if(Inputs.begin(), Inputs.end(), [](const InputInfo *II) {
return !II->DataFlowTraceForFocusFunction.empty();
});
}
bool empty() const { return Inputs.empty(); }
const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
void AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile,
bool HasFocusFunction, const Vector<uint32_t> &FeatureSet) {
bool HasFocusFunction, const Vector<uint32_t> &FeatureSet,
const DataFlowTrace &DFT) {
assert(!U.empty());
if (FeatureDebug)
Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures);
@ -92,7 +101,11 @@ class InputCorpus {
II.HasFocusFunction = HasFocusFunction;
std::sort(II.UniqFeatureSet.begin(), II.UniqFeatureSet.end());
ComputeSHA1(U.data(), U.size(), II.Sha1);
Hashes.insert(Sha1ToString(II.Sha1));
auto Sha1Str = Sha1ToString(II.Sha1);
Hashes.insert(Sha1Str);
if (HasFocusFunction)
if (auto V = DFT.Get(Sha1Str))
II.DataFlowTraceForFocusFunction = *V;
UpdateCorpusDistribution();
PrintCorpus();
// ValidateFeatureSet();

View File

@ -73,6 +73,7 @@ void DataFlowTrace::Init(const std::string &DirPath,
ParseError("the trace should contain only 0 or 1");
V[I] = Beg[I] == '1';
}
Traces[Name] = V;
// Print just a few small traces.
if (NumTracesWithFocusFunction <= 3 && Len <= 16)
Printf("%s => |%s|\n", Name.c_str(), L.c_str() + SpacePos + 1);

View File

@ -31,9 +31,25 @@
#include "FuzzerDefs.h"
#include <unordered_map>
#include <vector>
#include <string>
namespace fuzzer {
struct DataFlowTrace {
class DataFlowTrace {
public:
void Init(const std::string &DirPath, const std::string &FocusFunction);
void Clear() { Traces.clear(); }
const Vector<bool> *Get(const std::string &InputSha1) const {
auto It = Traces.find(InputSha1);
if (It != Traces.end())
return &It->second;
return nullptr;
}
private:
// Input's sha1 => DFT for the FocusFunction.
std::unordered_map<std::string, Vector<bool> > Traces;
};
} // namespace fuzzer

View File

@ -469,10 +469,11 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
TPC.UpdateObservedPCs();
Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile,
TPC.ObservedFocusFunction(),
UniqFeatureSetTmp);
UniqFeatureSetTmp, DFT);
return true;
}
if (II && FoundUniqFeaturesOfII &&
II->DataFlowTraceForFocusFunction.empty() &&
FoundUniqFeaturesOfII == II->UniqFeatureSet.size() &&
II->U.size() > Size) {
Corpus.Replace(II, {Data, Data + Size});
@ -739,6 +740,9 @@ void Fuzzer::ReadAndExecuteSeedCorpora(const Vector<std::string> &CorpusDirs) {
if (!Options.FocusFunction.empty())
Printf("INFO: %zd/%zd inputs touch the focus function\n",
Corpus.NumInputsThatTouchFocusFunction(), Corpus.size());
if (!Options.DataFlowTrace.empty())
Printf("INFO: %zd/%zd inputs have the Data Flow Trace\n",
Corpus.NumInputsWithDataFlowTrace(), Corpus.size());
if (Corpus.empty() && Options.MaxNumberOfRuns) {
Printf("ERROR: no interesting inputs were found. "
@ -749,6 +753,7 @@ void Fuzzer::ReadAndExecuteSeedCorpora(const Vector<std::string> &CorpusDirs) {
void Fuzzer::Loop(const Vector<std::string> &CorpusDirs) {
ReadAndExecuteSeedCorpora(CorpusDirs);
DFT.Clear(); // No need for DFT any more.
TPC.SetPrintNewPCs(Options.PrintNewCovPcs);
TPC.SetPrintNewFuncs(Options.PrintNewCovFuncs);
system_clock::time_point LastCorpusReload = system_clock::now();

View File

@ -582,12 +582,13 @@ TEST(FuzzerUtil, Base64) {
}
TEST(Corpus, Distribution) {
DataFlowTrace DFT;
Random Rand(0);
std::unique_ptr<InputCorpus> C(new InputCorpus(""));
size_t N = 10;
size_t TriesPerUnit = 1<<16;
for (size_t i = 0; i < N; i++)
C->AddToCorpus(Unit{ static_cast<uint8_t>(i) }, 1, false, false, {});
C->AddToCorpus(Unit{ static_cast<uint8_t>(i) }, 1, false, false, {}, DFT);
Vector<size_t> Hist(N);
for (size_t i = 0; i < N * TriesPerUnit; i++) {

View File

@ -0,0 +1,31 @@
# Tests the data flow tracer.
REQUIRES: linux
# Build the tracer and the test.
RUN: %no_fuzzer_cpp_compiler -c -fno-sanitize=all -fsanitize=dataflow %S/../../lib/fuzzer/dataflow/DataFlow.cpp -o %t-DataFlow.o
RUN: %no_fuzzer_cpp_compiler -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp %S/OnlySomeBytesTest.cpp %t-DataFlow.o -o %t-DFT
RUN: %cpp_compiler %S/OnlySomeBytesTest.cpp -o %t-Fuzz
# Prepare the inputs.
RUN: rm -rf %t/IN
RUN: mkdir -p %t/IN
RUN: echo -n 0123456789012345678901234567890123456789012345678901234567891234 > %t/IN/6
RUN: cat %t/IN/6 %t/IN/6 %t/IN/6 %t/IN/6 > %t/IN/8
RUN: cat %t/IN/8 %t/IN/8 %t/IN/8 %t/IN/8 > %t/IN/10
RUN: cat %t/IN/10 %t/IN/10 %t/IN/10 %t/IN/10 > %t/IN/12
# %t/IN/12 is 4096 bytes-long.
RUN: %t-Fuzz -focus_function=f0 -runs=0 %t/IN 2>&1 | FileCheck %s --check-prefix=NO_FOCUSED_INPUT
NO_FOCUSED_INPUT: INFO: 0/2 inputs touch the focus function
RUN: (echo -n ABC; cat %t/IN/12) > %t/IN/ABC
RUN: %t-Fuzz -focus_function=f0 -runs=0 %t/IN 2>&1 | FileCheck %s --check-prefix=ONE_FOCUSED_INPUT
ONE_FOCUSED_INPUT: INFO: 1/3 inputs touch the focus function
RUN: rm -rf %t/DFT_DIR
RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-DFT %t/IN %t/DFT_DIR
# Repat twice to make sure that the inputs with DFT are not removed from the corpus.
RUN: %t-Fuzz -focus_function=f0 -data_flow_trace=%t/DFT_DIR -runs=100 %t/IN 2>&1 | FileCheck %s --check-prefix=HAVE_DFT
RUN: %t-Fuzz -focus_function=f0 -data_flow_trace=%t/DFT_DIR -runs=100 %t/IN 2>&1 | FileCheck %s --check-prefix=HAVE_DFT
HAVE_DFT: INFO: 1/{{.*}} inputs have the Data Flow Trace