[libFUzzer] change the way we load the seed corpora: instead of loading all files and these executing all files, load and execute them one-by-one. This should reduce the memory usage in many cases

llvm-svn: 312033
This commit is contained in:
Kostya Serebryany 2017-08-29 20:51:24 +00:00
parent c29c2c9b0c
commit 4faeb87ebe
6 changed files with 68 additions and 53 deletions

View File

@ -53,6 +53,7 @@ void RawPrint(const char *Str);
// Platform specific functions:
bool IsFile(const std::string &Path);
size_t FileSize(const std::string &Path);
void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
Vector<std::string> *V, bool TopDir);

View File

@ -32,6 +32,13 @@ bool IsFile(const std::string &Path) {
return S_ISREG(St.st_mode);
}
size_t FileSize(const std::string &Path) {
struct stat St;
if (stat(Path.c_str(), &St))
return 0;
return St.st_size;
}
void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
Vector<std::string> *V, bool TopDir) {
auto E = GetEpoch(Dir);

View File

@ -38,7 +38,6 @@ public:
void Loop(const Vector<std::string> &CorpusDirs);
void ReadAndExecuteSeedCorpora(const Vector<std::string> &CorpusDirs);
void MinimizeCrashLoop(const Unit &U);
void ShuffleAndMinimize(UnitVector *V);
void RereadOutputCorpus(size_t MaxSize);
size_t secondsSinceProcessStartUp() {
@ -103,7 +102,6 @@ private:
void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix);
void PrintStats(const char *Where, const char *End = "\n", size_t Units = 0);
void PrintStatusForNewUnit(const Unit &U, const char *Text);
void ShuffleCorpus(UnitVector *V);
void CheckExitOnSrcPosOrItem();
static void StaticDeathCallback();

View File

@ -371,39 +371,6 @@ void Fuzzer::RereadOutputCorpus(size_t MaxSize) {
PrintStats("RELOAD");
}
void Fuzzer::ShuffleCorpus(UnitVector *V) {
std::shuffle(V->begin(), V->end(), MD.GetRand());
if (Options.PreferSmall)
std::stable_sort(V->begin(), V->end(), [](const Unit &A, const Unit &B) {
return A.size() < B.size();
});
}
void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) {
Printf("#0\tREAD units: %zd; rss: %zdMb\n", InitialCorpus->size(),
GetPeakRSSMb());
if (Options.ShuffleAtStartUp)
ShuffleCorpus(InitialCorpus);
// Test the callback with empty input and never try it again.
uint8_t dummy;
ExecuteCallback(&dummy, 0);
for (auto &U : *InitialCorpus) {
RunOne(U.data(), U.size());
CheckExitOnSrcPosOrItem();
TryDetectingAMemoryLeak(U.data(), U.size(),
/*DuringInitialCorpusExecution*/ true);
U.clear();
}
PrintStats("INITED");
if (Corpus.empty()) {
Printf("ERROR: no interesting inputs were found. "
"Is the code instrumented for coverage? Exiting.\n");
exit(1);
}
}
void Fuzzer::PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size) {
auto TimeOfUnit =
duration_cast<seconds>(UnitStopTime - UnitStartTime).count();
@ -628,26 +595,68 @@ void Fuzzer::MutateAndTestOne() {
void Fuzzer::ReadAndExecuteSeedCorpora(const Vector<std::string> &CorpusDirs) {
const size_t kMaxSaneLen = 1 << 20;
const size_t kMinDefaultLen = 4096;
size_t TemporaryMaxLen = Options.MaxLen ? Options.MaxLen : kMaxSaneLen;
UnitVector InitialCorpus;
for (auto &Inp : CorpusDirs) {
Printf("Loading corpus dir: %s\n", Inp.c_str());
ReadDirToVectorOfUnits(Inp.c_str(), &InitialCorpus, nullptr,
TemporaryMaxLen, /*ExitOnError=*/false);
struct SizedFile {
std::string File;
size_t Size;
};
Vector<SizedFile> SizedFiles;
size_t MaxSize = 0;
size_t MinSize = -1;
size_t TotalSize = 0;
for (auto &Dir : CorpusDirs) {
Vector<std::string> Files;
ListFilesInDirRecursive(Dir, 0, &Files, /*TopDir*/true);
Printf("INFO: % 8zd files found in %s\n", Files.size(), Dir.c_str());
for (auto &File : Files) {
if (size_t Size = FileSize(File)) {
MaxSize = Max(Size, MaxSize);
MinSize = Min(Size, MinSize);
TotalSize += Size;
SizedFiles.push_back({File, Size});
}
}
}
if (Options.MaxLen == 0) {
size_t MaxLen = 0;
for (auto &U : InitialCorpus)
MaxLen = std::max(U.size(), MaxLen);
SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxLen), kMaxSaneLen));
if (Options.MaxLen == 0)
SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxSize), kMaxSaneLen));
assert(MaxInputLen > 0);
if (SizedFiles.empty()) {
Printf("INFO: A corpus is not provided, starting from an empty corpus\n");
Unit U({'\n'}); // Valid ASCII input.
RunOne(U.data(), U.size());
} else {
Printf("INFO: seed corpus: files: %zd min: %zdb max: %zdb total: %zdb"
" rss: %zdMb\n",
SizedFiles.size(), MinSize, MaxSize, TotalSize, GetPeakRSSMb());
if (Options.ShuffleAtStartUp)
std::shuffle(SizedFiles.begin(), SizedFiles.end(), MD.GetRand());
if (Options.PreferSmall)
std::stable_sort(
SizedFiles.begin(), SizedFiles.end(),
[](const SizedFile &A, const SizedFile &B) { return A.Size < B.Size; });
// Load and execute inputs one by one.
for (auto &SF : SizedFiles) {
auto U = FileToVector(SF.File, MaxInputLen);
assert(U.size() <= MaxInputLen);
RunOne(U.data(), U.size());
CheckExitOnSrcPosOrItem();
TryDetectingAMemoryLeak(U.data(), U.size(),
/*DuringInitialCorpusExecution*/ true);
}
}
if (InitialCorpus.empty()) {
InitialCorpus.push_back(Unit({'\n'})); // Valid ASCII input.
if (Options.Verbosity)
Printf("INFO: A corpus is not provided, starting from an empty corpus\n");
// Test the callback with empty input and never try it again.
uint8_t dummy;
ExecuteCallback(&dummy, 0);
PrintStats("INITED");
if (Corpus.empty()) {
Printf("ERROR: no interesting inputs were found. "
"Is the code instrumented for coverage? Exiting.\n");
exit(1);
}
ShuffleAndMinimize(&InitialCorpus);
}
void Fuzzer::Loop(const Vector<std::string> &CorpusDirs) {

View File

@ -6,7 +6,7 @@ RUN: echo a > %t/SUB1/a
RUN: echo b > %t/SUB1/SUB2/b
RUN: echo c > %t/SUB1/SUB2/SUB3/c
RUN: %t-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=SUBDIRS
SUBDIRS: READ units: 3
SUBDIRS: INFO: seed corpus: files: 3 min: 2b max: 2b total: 6b
RUN: echo -n zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz > %t/SUB1/f64
RUN: cat %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 > %t/SUB1/f256
RUN: cat %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 > %t/SUB1/f1024

View File

@ -9,7 +9,7 @@ CHECK: INFO: found item with checksum '0eb8e4ed029b774d80f2b66408203801cb982a60'
# Test that reduce_inputs deletes redundant files in the corpus.
RUN: %t-ShrinkControlFlowSimpleTest -runs=0 %t/C 2>&1 | FileCheck %s --check-prefix=COUNT
COUNT: READ units: 4
COUNT: seed corpus: files: 4
# a bit longer test
RUN: %t-ShrinkControlFlowTest -exit_on_item=0eb8e4ed029b774d80f2b66408203801cb982a60 -seed=1 -runs=1000000 2>&1 | FileCheck %s