forked from OSchip/llvm-project
[libFUzzer] change the way we load the seed corpora: instead of loading all files and these executing all files, load and execute them one-by-one. This should reduce the memory usage in many cases
llvm-svn: 312033
This commit is contained in:
parent
c29c2c9b0c
commit
4faeb87ebe
|
@ -53,6 +53,7 @@ void RawPrint(const char *Str);
|
|||
|
||||
// Platform specific functions:
|
||||
bool IsFile(const std::string &Path);
|
||||
size_t FileSize(const std::string &Path);
|
||||
|
||||
void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
|
||||
Vector<std::string> *V, bool TopDir);
|
||||
|
|
|
@ -32,6 +32,13 @@ bool IsFile(const std::string &Path) {
|
|||
return S_ISREG(St.st_mode);
|
||||
}
|
||||
|
||||
size_t FileSize(const std::string &Path) {
|
||||
struct stat St;
|
||||
if (stat(Path.c_str(), &St))
|
||||
return 0;
|
||||
return St.st_size;
|
||||
}
|
||||
|
||||
void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
|
||||
Vector<std::string> *V, bool TopDir) {
|
||||
auto E = GetEpoch(Dir);
|
||||
|
|
|
@ -38,7 +38,6 @@ public:
|
|||
void Loop(const Vector<std::string> &CorpusDirs);
|
||||
void ReadAndExecuteSeedCorpora(const Vector<std::string> &CorpusDirs);
|
||||
void MinimizeCrashLoop(const Unit &U);
|
||||
void ShuffleAndMinimize(UnitVector *V);
|
||||
void RereadOutputCorpus(size_t MaxSize);
|
||||
|
||||
size_t secondsSinceProcessStartUp() {
|
||||
|
@ -103,7 +102,6 @@ private:
|
|||
void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix);
|
||||
void PrintStats(const char *Where, const char *End = "\n", size_t Units = 0);
|
||||
void PrintStatusForNewUnit(const Unit &U, const char *Text);
|
||||
void ShuffleCorpus(UnitVector *V);
|
||||
void CheckExitOnSrcPosOrItem();
|
||||
|
||||
static void StaticDeathCallback();
|
||||
|
|
|
@ -371,39 +371,6 @@ void Fuzzer::RereadOutputCorpus(size_t MaxSize) {
|
|||
PrintStats("RELOAD");
|
||||
}
|
||||
|
||||
void Fuzzer::ShuffleCorpus(UnitVector *V) {
|
||||
std::shuffle(V->begin(), V->end(), MD.GetRand());
|
||||
if (Options.PreferSmall)
|
||||
std::stable_sort(V->begin(), V->end(), [](const Unit &A, const Unit &B) {
|
||||
return A.size() < B.size();
|
||||
});
|
||||
}
|
||||
|
||||
void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) {
|
||||
Printf("#0\tREAD units: %zd; rss: %zdMb\n", InitialCorpus->size(),
|
||||
GetPeakRSSMb());
|
||||
if (Options.ShuffleAtStartUp)
|
||||
ShuffleCorpus(InitialCorpus);
|
||||
|
||||
// Test the callback with empty input and never try it again.
|
||||
uint8_t dummy;
|
||||
ExecuteCallback(&dummy, 0);
|
||||
|
||||
for (auto &U : *InitialCorpus) {
|
||||
RunOne(U.data(), U.size());
|
||||
CheckExitOnSrcPosOrItem();
|
||||
TryDetectingAMemoryLeak(U.data(), U.size(),
|
||||
/*DuringInitialCorpusExecution*/ true);
|
||||
U.clear();
|
||||
}
|
||||
PrintStats("INITED");
|
||||
if (Corpus.empty()) {
|
||||
Printf("ERROR: no interesting inputs were found. "
|
||||
"Is the code instrumented for coverage? Exiting.\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void Fuzzer::PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size) {
|
||||
auto TimeOfUnit =
|
||||
duration_cast<seconds>(UnitStopTime - UnitStartTime).count();
|
||||
|
@ -628,26 +595,68 @@ void Fuzzer::MutateAndTestOne() {
|
|||
void Fuzzer::ReadAndExecuteSeedCorpora(const Vector<std::string> &CorpusDirs) {
|
||||
const size_t kMaxSaneLen = 1 << 20;
|
||||
const size_t kMinDefaultLen = 4096;
|
||||
size_t TemporaryMaxLen = Options.MaxLen ? Options.MaxLen : kMaxSaneLen;
|
||||
UnitVector InitialCorpus;
|
||||
for (auto &Inp : CorpusDirs) {
|
||||
Printf("Loading corpus dir: %s\n", Inp.c_str());
|
||||
ReadDirToVectorOfUnits(Inp.c_str(), &InitialCorpus, nullptr,
|
||||
TemporaryMaxLen, /*ExitOnError=*/false);
|
||||
struct SizedFile {
|
||||
std::string File;
|
||||
size_t Size;
|
||||
};
|
||||
Vector<SizedFile> SizedFiles;
|
||||
size_t MaxSize = 0;
|
||||
size_t MinSize = -1;
|
||||
size_t TotalSize = 0;
|
||||
for (auto &Dir : CorpusDirs) {
|
||||
Vector<std::string> Files;
|
||||
ListFilesInDirRecursive(Dir, 0, &Files, /*TopDir*/true);
|
||||
Printf("INFO: % 8zd files found in %s\n", Files.size(), Dir.c_str());
|
||||
for (auto &File : Files) {
|
||||
if (size_t Size = FileSize(File)) {
|
||||
MaxSize = Max(Size, MaxSize);
|
||||
MinSize = Min(Size, MinSize);
|
||||
TotalSize += Size;
|
||||
SizedFiles.push_back({File, Size});
|
||||
}
|
||||
}
|
||||
}
|
||||
if (Options.MaxLen == 0) {
|
||||
size_t MaxLen = 0;
|
||||
for (auto &U : InitialCorpus)
|
||||
MaxLen = std::max(U.size(), MaxLen);
|
||||
SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxLen), kMaxSaneLen));
|
||||
if (Options.MaxLen == 0)
|
||||
SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxSize), kMaxSaneLen));
|
||||
assert(MaxInputLen > 0);
|
||||
|
||||
if (SizedFiles.empty()) {
|
||||
Printf("INFO: A corpus is not provided, starting from an empty corpus\n");
|
||||
Unit U({'\n'}); // Valid ASCII input.
|
||||
RunOne(U.data(), U.size());
|
||||
} else {
|
||||
Printf("INFO: seed corpus: files: %zd min: %zdb max: %zdb total: %zdb"
|
||||
" rss: %zdMb\n",
|
||||
SizedFiles.size(), MinSize, MaxSize, TotalSize, GetPeakRSSMb());
|
||||
if (Options.ShuffleAtStartUp)
|
||||
std::shuffle(SizedFiles.begin(), SizedFiles.end(), MD.GetRand());
|
||||
|
||||
if (Options.PreferSmall)
|
||||
std::stable_sort(
|
||||
SizedFiles.begin(), SizedFiles.end(),
|
||||
[](const SizedFile &A, const SizedFile &B) { return A.Size < B.Size; });
|
||||
|
||||
// Load and execute inputs one by one.
|
||||
for (auto &SF : SizedFiles) {
|
||||
auto U = FileToVector(SF.File, MaxInputLen);
|
||||
assert(U.size() <= MaxInputLen);
|
||||
RunOne(U.data(), U.size());
|
||||
CheckExitOnSrcPosOrItem();
|
||||
TryDetectingAMemoryLeak(U.data(), U.size(),
|
||||
/*DuringInitialCorpusExecution*/ true);
|
||||
}
|
||||
}
|
||||
|
||||
if (InitialCorpus.empty()) {
|
||||
InitialCorpus.push_back(Unit({'\n'})); // Valid ASCII input.
|
||||
if (Options.Verbosity)
|
||||
Printf("INFO: A corpus is not provided, starting from an empty corpus\n");
|
||||
// Test the callback with empty input and never try it again.
|
||||
uint8_t dummy;
|
||||
ExecuteCallback(&dummy, 0);
|
||||
|
||||
PrintStats("INITED");
|
||||
if (Corpus.empty()) {
|
||||
Printf("ERROR: no interesting inputs were found. "
|
||||
"Is the code instrumented for coverage? Exiting.\n");
|
||||
exit(1);
|
||||
}
|
||||
ShuffleAndMinimize(&InitialCorpus);
|
||||
}
|
||||
|
||||
void Fuzzer::Loop(const Vector<std::string> &CorpusDirs) {
|
||||
|
|
|
@ -6,7 +6,7 @@ RUN: echo a > %t/SUB1/a
|
|||
RUN: echo b > %t/SUB1/SUB2/b
|
||||
RUN: echo c > %t/SUB1/SUB2/SUB3/c
|
||||
RUN: %t-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=SUBDIRS
|
||||
SUBDIRS: READ units: 3
|
||||
SUBDIRS: INFO: seed corpus: files: 3 min: 2b max: 2b total: 6b
|
||||
RUN: echo -n zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz > %t/SUB1/f64
|
||||
RUN: cat %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 > %t/SUB1/f256
|
||||
RUN: cat %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 > %t/SUB1/f1024
|
||||
|
|
|
@ -9,7 +9,7 @@ CHECK: INFO: found item with checksum '0eb8e4ed029b774d80f2b66408203801cb982a60'
|
|||
|
||||
# Test that reduce_inputs deletes redundant files in the corpus.
|
||||
RUN: %t-ShrinkControlFlowSimpleTest -runs=0 %t/C 2>&1 | FileCheck %s --check-prefix=COUNT
|
||||
COUNT: READ units: 4
|
||||
COUNT: seed corpus: files: 4
|
||||
|
||||
# a bit longer test
|
||||
RUN: %t-ShrinkControlFlowTest -exit_on_item=0eb8e4ed029b774d80f2b66408203801cb982a60 -seed=1 -runs=1000000 2>&1 | FileCheck %s
|
||||
|
|
Loading…
Reference in New Issue