[libFuzzer] when using data-flow-trace (DFT) only load the DFT for the files present in the corpus

llvm-svn: 361579
This commit is contained in:
Kostya Serebryany 2019-05-24 00:43:52 +00:00
parent ab09cca310
commit 060f4b48d5
4 changed files with 18 additions and 11 deletions

View File

@ -100,6 +100,7 @@ void DataFlowTrace::ReadCoverage(const std::string &DirPath) {
for (auto &SF : Files) {
auto Name = Basename(SF.File);
if (Name == kFunctionsTxt) continue;
if (!CorporaHashes.count(Name)) continue;
std::ifstream IF(SF.File);
Coverage.AppendCoverage(IF);
}
@ -154,9 +155,8 @@ static bool ParseDFTLine(const std::string &Line, size_t *FunctionNum,
return true;
}
bool DataFlowTrace::Init(const std::string &DirPath,
std::string *FocusFunction,
Random &Rand) {
bool DataFlowTrace::Init(const std::string &DirPath, std::string *FocusFunction,
Vector<SizedFile> &CorporaFiles, Random &Rand) {
if (DirPath.empty()) return false;
Printf("INFO: DataFlowTrace: reading from '%s'\n", DirPath.c_str());
Vector<SizedFile> Files;
@ -165,6 +165,10 @@ bool DataFlowTrace::Init(const std::string &DirPath,
size_t FocusFuncIdx = SIZE_MAX;
Vector<std::string> FunctionNames;
// Collect the hashes of the corpus files.
for (auto &SF : CorporaFiles)
CorporaHashes.insert(Hash(FileToVector(SF.File)));
// Read functions.txt
std::ifstream IF(DirPlusFile(DirPath, kFunctionsTxt));
size_t NumFunctions = 0;
@ -211,6 +215,7 @@ bool DataFlowTrace::Init(const std::string &DirPath,
for (auto &SF : Files) {
auto Name = Basename(SF.File);
if (Name == kFunctionsTxt) continue;
if (!CorporaHashes.count(Name)) continue; // not in the corpus.
NumTraceFiles++;
// Printf("=== %s\n", Name.c_str());
std::ifstream IF(SF.File);
@ -231,11 +236,10 @@ bool DataFlowTrace::Init(const std::string &DirPath,
}
}
}
assert(NumTraceFiles == Files.size() - 1);
Printf("INFO: DataFlowTrace: %zd trace files, %zd functions, "
"%zd traces with focus function\n",
NumTraceFiles, NumFunctions, NumTracesWithFocusFunction);
return true;
return NumTraceFiles > 0;
}
int CollectDataFlow(const std::string &DFTBinary, const std::string &DirPath,
@ -311,7 +315,7 @@ int CollectDataFlow(const std::string &DFTBinary, const std::string &DirPath,
}
RemoveFile(Temp);
// Write functions.txt if it's currently empty or doesn't exist.
auto FunctionsTxtPath = DirPlusFile(DirPath, "functions.txt");
auto FunctionsTxtPath = DirPlusFile(DirPath, kFunctionsTxt);
if (FileToString(FunctionsTxtPath).empty()) {
Command Cmd;
Cmd.addArgument(DFTBinary);

View File

@ -32,6 +32,7 @@
#include "FuzzerIO.h"
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <string>
@ -112,7 +113,7 @@ class DataFlowTrace {
public:
void ReadCoverage(const std::string &DirPath);
bool Init(const std::string &DirPath, std::string *FocusFunction,
Random &Rand);
Vector<SizedFile> &CorporaFiles, Random &Rand);
void Clear() { Traces.clear(); }
const Vector<uint8_t> *Get(const std::string &InputSha1) const {
auto It = Traces.find(InputSha1);
@ -125,6 +126,7 @@ class DataFlowTrace {
// Input's sha1 => DFT for the FocusFunction.
std::unordered_map<std::string, Vector<uint8_t> > Traces;
BlockCoverage Coverage;
std::unordered_set<std::string> CorporaHashes;
};
} // namespace fuzzer

View File

@ -157,9 +157,6 @@ Fuzzer::Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD,
AllocateCurrentUnitData();
CurrentUnitSize = 0;
memset(BaseSha1, 0, sizeof(BaseSha1));
auto FocusFunctionOrAuto = Options.FocusFunction;
DFT.Init(Options.DataFlowTrace, &FocusFunctionOrAuto , MD.GetRand());
TPC.SetFocusFunction(FocusFunctionOrAuto);
}
Fuzzer::~Fuzzer() {}
@ -789,6 +786,10 @@ void Fuzzer::ReadAndExecuteSeedCorpora(Vector<SizedFile> &CorporaFiles) {
}
void Fuzzer::Loop(Vector<SizedFile> &CorporaFiles) {
auto FocusFunctionOrAuto = Options.FocusFunction;
DFT.Init(Options.DataFlowTrace, &FocusFunctionOrAuto, CorporaFiles,
MD.GetRand());
TPC.SetFocusFunction(FocusFunctionOrAuto);
ReadAndExecuteSeedCorpora(CorporaFiles);
DFT.Clear(); // No need for DFT any more.
TPC.SetPrintNewPCs(Options.PrintNewCovPcs);

View File

@ -92,7 +92,7 @@ RUN: %t-ThreeFunctionsTest -collect_data_flow=%t-ThreeFunctionsTestDF -data_flo
# Test that we can run collect_data_flow on the entire corpus dir
RUN: rm -rf %t/OUT
RUN: %t-ThreeFunctionsTest -collect_data_flow=%t-ThreeFunctionsTestDF -data_flow_trace=%t/OUT %t/IN
RUN: %t-ThreeFunctionsTest -data_flow_trace=%t/OUT -runs=0 -focus_function=Func2 2>&1 | FileCheck %s --check-prefix=USE_DATA_FLOW_TRACE
RUN: %t-ThreeFunctionsTest -data_flow_trace=%t/OUT -runs=0 -focus_function=Func2 %t/IN 2>&1 | FileCheck %s --check-prefix=USE_DATA_FLOW_TRACE
USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: reading from {{.*}}/OUT