[libFuzzer] refactoring: move the Corpus into a separate class; delete two unused experimental features

llvm-svn: 282042
This commit is contained in:
Kostya Serebryany 2016-09-21 01:04:43 +00:00
parent 3eb83b4a0d
commit 09aa01a6f8
8 changed files with 83 additions and 163 deletions

View File

@ -426,7 +426,6 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
Options.PrintNewCovPcs = Flags.print_pcs;
Options.PrintFinalStats = Flags.print_final_stats;
Options.PrintCoverage = Flags.print_coverage;
Options.TruncateUnits = Flags.truncate_units;
Options.PruneCorpus = Flags.prune_corpus;
if (Flags.use_value_profile)
@ -495,25 +494,26 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
size_t TemporaryMaxLen = Options.MaxLen ? Options.MaxLen : kMaxSaneLen;
F.RereadOutputCorpus(TemporaryMaxLen);
for (auto &inp : *Inputs)
if (inp != Options.OutputCorpus)
F.ReadDir(inp, nullptr, TemporaryMaxLen);
UnitVector InitialCorpus;
for (auto &Inp : *Inputs) {
Printf("Loading corpus dir: %s\n", Inp.c_str());
ReadDirToVectorOfUnits(Inp.c_str(), &InitialCorpus, nullptr, TemporaryMaxLen);
}
if (Options.MaxLen == 0)
F.SetMaxLen(
std::min(std::max(kMinDefaultLen, F.MaxUnitSizeInCorpus()), kMaxSaneLen));
if (Options.MaxLen == 0) {
size_t MaxLen = 0;
for (auto &U : InitialCorpus)
MaxLen = std::max(U.size(), MaxLen);
F.SetMaxLen(std::min(std::max(kMinDefaultLen, MaxLen), kMaxSaneLen));
}
if (F.CorpusSize() == 0) {
F.AddToCorpus(Unit()); // Can't fuzz empty corpus, so add an empty input.
if (InitialCorpus.empty()) {
InitialCorpus.push_back(Unit());
if (Options.Verbosity)
Printf("INFO: A corpus is not provided, starting from an empty corpus\n");
}
F.ShuffleAndMinimize();
if (Flags.drill)
F.Drill();
else
F.Loop();
F.ShuffleAndMinimize(&InitialCorpus);
F.Loop();
if (Flags.verbosity)
Printf("Done %d runs in %zd second(s)\n", F.getTotalNumberOfRuns(),

View File

@ -71,8 +71,6 @@ FUZZER_FLAG_STRING(exact_artifact_path,
"as $(exact_artifact_path). This overrides -artifact_prefix "
"and will not use checksum in the file name. Do not "
"use the same path for several parallel processes.")
FUZZER_FLAG_INT(drill, 0, "Experimental: fuzz using a single unit as the seed "
"corpus, then merge with the initial corpus")
FUZZER_FLAG_INT(output_csv, 0, "Enable pulse output in CSV format.")
FUZZER_FLAG_INT(print_pcs, 0, "If 1, print out newly covered PCs.")
FUZZER_FLAG_INT(print_final_stats, 0, "If 1, print statistics at exit.")
@ -92,7 +90,6 @@ FUZZER_FLAG_INT(detect_leaks, 1, "If 1, and if LeakSanitizer is enabled "
"try to detect memory leaks during fuzzing (i.e. not only at shut down).")
FUZZER_FLAG_INT(rss_limit_mb, 2048, "If non-zero, the fuzzer will exit upon"
"reaching this limit of RSS memory usage.")
FUZZER_FLAG_INT(truncate_units, 0, "Try truncated units when loading corpus.")
FUZZER_FLAG_INT(prune_corpus, 1, "Prune corpus items without new coverage when "
"loading corpus.")
@ -101,3 +98,5 @@ FUZZER_DEPRECATED_FLAG(save_minimized_corpus)
FUZZER_DEPRECATED_FLAG(sync_command)
FUZZER_DEPRECATED_FLAG(sync_timeout)
FUZZER_DEPRECATED_FLAG(test_single_input)
FUZZER_DEPRECATED_FLAG(drill)
FUZZER_DEPRECATED_FLAG(truncate_units)

View File

@ -30,7 +30,7 @@ bool IsFile(const std::string &Path) {
return S_ISREG(St.st_mode);
}
static long GetEpoch(const std::string &Path) {
long GetEpoch(const std::string &Path) {
struct stat St;
if (stat(Path.c_str(), &St))
return 0; // Can't stat, be conservative.

View File

@ -89,6 +89,7 @@ private:
typedef FixedWord<27> Word; // 28 bytes.
bool IsFile(const std::string &Path);
long GetEpoch(const std::string &Path);
std::string FileToString(const std::string &Path);
Unit FileToVector(const std::string &Path, size_t MaxSize = 0);
void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V,
@ -246,10 +247,44 @@ struct FuzzingOptions {
bool PrintFinalStats = false;
bool PrintCoverage = false;
bool DetectLeaks = true;
bool TruncateUnits = false;
bool PruneCorpus = true;
};
struct InputInfo {
Unit U; // The actual input data.
};
class InputCorpus {
public:
InputCorpus() {
Corpus.reserve(1 << 14); // Avoid too many resizes.
}
size_t size() const { return Corpus.size(); }
bool empty() const { return Corpus.empty(); }
const Unit &operator[] (size_t Idx) const { return Corpus[Idx].U; }
void Append(const std::vector<Unit> &V) {
for (auto &U : V)
push_back(U);
}
void push_back(const Unit &U) {
auto H = Hash(U);
if (!Hashes.insert(H).second) return;
InputInfo II;
II.U = U;
Corpus.push_back(II);
}
typedef const std::vector<InputInfo>::const_iterator ConstIter;
ConstIter begin() const { return Corpus.begin(); }
ConstIter end() const { return Corpus.end(); }
bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); }
private:
std::unordered_set<std::string> Hashes;
std::vector<InputInfo> Corpus;
};
class MutationDispatcher {
public:
MutationDispatcher(Random &Rand, const FuzzingOptions &Options);
@ -316,7 +351,7 @@ public:
void ClearAutoDictionary();
void PrintRecommendedDictionary();
void SetCorpus(const std::vector<Unit> *Corpus) { this->Corpus = Corpus; }
void SetCorpus(const InputCorpus *Corpus) { this->Corpus = Corpus; }
Random &GetRand() { return Rand; }
@ -350,7 +385,7 @@ private:
Dictionary PersistentAutoDictionary;
std::vector<Mutator> CurrentMutatorSequence;
std::vector<DictionaryEntry *> CurrentDictionaryEntrySequence;
const std::vector<Unit> *Corpus = nullptr;
const InputCorpus *Corpus = nullptr;
std::vector<uint8_t> MutateInPlaceHere;
std::vector<Mutator> Mutators;
@ -458,21 +493,13 @@ public:
}
size_t ChooseUnitIdxToMutate();
const Unit &ChooseUnitToMutate() { return Corpus[ChooseUnitIdxToMutate()]; };
void TruncateUnits(std::vector<Unit> *NewCorpus);
void Loop();
void Drill();
void ShuffleAndMinimize();
void ShuffleAndMinimize(UnitVector *V);
void InitializeTraceState();
void AssignTaintLabels(uint8_t *Data, size_t Size);
size_t CorpusSize() const { return Corpus.size(); }
size_t MaxUnitSizeInCorpus() const;
void ReadDir(const std::string &Path, long *Epoch, size_t MaxSize) {
Printf("Loading corpus: %s\n", Path.c_str());
ReadDirToVectorOfUnits(Path.c_str(), &Corpus, Epoch, MaxSize);
}
void ReadDir(const std::string &Path, long *Epoch, size_t MaxSize);
void RereadOutputCorpus(size_t MaxSize);
// Save the current corpus to OutputCorpus.
void SaveCorpus();
size_t secondsSinceProcessStartUp() {
return duration_cast<seconds>(system_clock::now() - ProcessStartTime)
@ -561,8 +588,7 @@ private:
bool HasMoreMallocsThanFrees = false;
size_t NumberOfLeakDetectionAttempts = 0;
std::vector<Unit> Corpus;
std::unordered_set<std::string> UnitHashesAddedToCorpus;
InputCorpus Corpus;
std::piecewise_constant_distribution<double> CorpusDistribution;
UserCallback CB;

View File

@ -172,6 +172,8 @@ Fuzzer::Fuzzer(UserCallback CB, MutationDispatcher &MD, FuzzingOptions Options)
}
if (Options.Verbosity)
TPC.PrintModuleInfo();
if (!Options.OutputCorpus.empty() && Options.Reload)
EpochOfLastReadOfOutputCorpus = GetEpoch(Options.OutputCorpus);
}
Fuzzer::~Fuzzer() { }
@ -340,13 +342,6 @@ void Fuzzer::PrintFinalStats() {
Printf("stat::peak_rss_mb: %zd\n", GetPeakRSSMb());
}
size_t Fuzzer::MaxUnitSizeInCorpus() const {
size_t Res = 0;
for (auto &X : Corpus)
Res = std::max(Res, X.size());
return Res;
}
void Fuzzer::SetMaxLen(size_t MaxLen) {
assert(Options.MaxLen == 0); // Can only reset MaxLen from 0 to non-0.
assert(MaxLen);
@ -354,25 +349,25 @@ void Fuzzer::SetMaxLen(size_t MaxLen) {
Printf("INFO: -max_len is not provided, using %zd\n", Options.MaxLen);
}
void Fuzzer::ReadDir(const std::string &Path, long *Epoch, size_t MaxSize) {
Printf("Loading corpus: %s\n", Path.c_str());
std::vector<Unit> V;
ReadDirToVectorOfUnits(Path.c_str(), &V, Epoch, MaxSize);
for (auto &U : V)
Corpus.push_back(U);
}
void Fuzzer::RereadOutputCorpus(size_t MaxSize) {
if (Options.OutputCorpus.empty())
return;
if (Options.OutputCorpus.empty() || !Options.Reload) return;
std::vector<Unit> AdditionalCorpus;
ReadDirToVectorOfUnits(Options.OutputCorpus.c_str(), &AdditionalCorpus,
&EpochOfLastReadOfOutputCorpus, MaxSize);
if (Corpus.empty()) {
Corpus = AdditionalCorpus;
return;
}
if (!Options.Reload)
return;
if (Options.Verbosity >= 2)
Printf("Reload: read %zd new units.\n", AdditionalCorpus.size());
for (auto &X : AdditionalCorpus) {
if (X.size() > MaxSize)
X.resize(MaxSize);
if (UnitHashesAddedToCorpus.insert(Hash(X)).second) {
if (!Corpus.HasUnit(X)) {
if (RunOne(X)) {
Corpus.push_back(X);
UpdateCorpusDistribution();
@ -390,51 +385,22 @@ void Fuzzer::ShuffleCorpus(UnitVector *V) {
});
}
// Tries random prefixes of corpus items.
void Fuzzer::TruncateUnits(std::vector<Unit> *NewCorpus) {
std::vector<double> Fractions = {0.25, 0.5, 0.75, 1.0};
size_t TruncInputs = 0;
for (double Fraction : Fractions) {
for (const auto &U : Corpus) {
uint64_t S = MD.GetRand()(U.size() * Fraction);
if (!S || !RunOne(U.data(), S))
continue;
TruncInputs++;
Unit U1(U.begin(), U.begin() + S);
NewCorpus->push_back(U1);
}
}
if (TruncInputs)
Printf("\tINFO TRUNC %zd units added to in-memory corpus\n", TruncInputs);
}
void Fuzzer::ShuffleAndMinimize() {
PrintStats("READ ");
std::vector<Unit> NewCorpus;
void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) {
Printf("#0\tREAD units: %zd\n", InitialCorpus->size());
if (Options.ShuffleAtStartUp)
ShuffleCorpus(&Corpus);
ShuffleCorpus(InitialCorpus);
if (Options.TruncateUnits) {
ResetCoverage();
TruncateUnits(&NewCorpus);
ResetCoverage();
}
for (const auto &U : Corpus) {
for (const auto &U : *InitialCorpus) {
bool NewCoverage = RunOne(U);
if (!Options.PruneCorpus || NewCoverage) {
NewCorpus.push_back(U);
Corpus.push_back(U);
if (Options.Verbosity >= 2)
Printf("NEW0: %zd L %zd\n", MaxCoverage.BlockCoverage, U.size());
}
TryDetectingAMemoryLeak(U.data(), U.size(),
/*DuringInitialCorpusExecution*/ true);
}
Corpus = NewCorpus;
UpdateCorpusDistribution();
for (auto &X : Corpus)
UnitHashesAddedToCorpus.insert(Hash(X));
PrintStats("INITED");
if (Corpus.empty()) {
Printf("ERROR: no interesting inputs were found. "
@ -540,16 +506,6 @@ void Fuzzer::WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix) {
Printf("Base64: %s\n", Base64(U).c_str());
}
void Fuzzer::SaveCorpus() {
if (Options.OutputCorpus.empty())
return;
for (const auto &U : Corpus)
WriteToFile(U, DirPlusFile(Options.OutputCorpus, Hash(U)));
if (Options.Verbosity)
Printf("Written corpus of %zd files to %s\n", Corpus.size(),
Options.OutputCorpus.c_str());
}
void Fuzzer::PrintStatusForNewUnit(const Unit &U) {
if (!Options.PrintNEW)
return;
@ -584,7 +540,6 @@ void Fuzzer::PrintNewPCs() {
void Fuzzer::ReportNewCoverage(const Unit &U) {
Corpus.push_back(U);
UpdateCorpusDistribution();
UnitHashesAddedToCorpus.insert(Hash(U));
MD.RecordSuccessfulMutationSequence();
PrintStatusForNewUnit(U);
WriteToOutputCorpus(U);
@ -599,6 +554,7 @@ void Fuzzer::ReportNewCoverage(const Unit &U) {
UnitVector Fuzzer::FindExtraUnits(const UnitVector &Initial,
const UnitVector &Extra) {
UnitVector Res = Extra;
UnitVector Tmp;
size_t OldSize = Res.size();
for (int Iter = 0; Iter < 10; Iter++) {
ShuffleCorpus(&Res);
@ -607,18 +563,18 @@ UnitVector Fuzzer::FindExtraUnits(const UnitVector &Initial,
for (auto &U : Initial)
RunOne(U);
Corpus.clear();
Tmp.clear();
for (auto &U : Res)
if (RunOne(U))
Corpus.push_back(U);
Tmp.push_back(U);
char Stat[7] = "MIN ";
Stat[3] = '0' + Iter;
PrintStats(Stat);
size_t NewSize = Corpus.size();
size_t NewSize = Tmp.size();
assert(NewSize <= OldSize);
Res.swap(Corpus);
Res.swap(Tmp);
if (NewSize + 5 >= OldSize)
break;
@ -736,54 +692,6 @@ void Fuzzer::ResetCoverage() {
PrepareCounters(&MaxCoverage);
}
// Experimental search heuristic: drilling.
// - Read, shuffle, execute and minimize the corpus.
// - Choose one random unit.
// - Reset the coverage.
// - Start fuzzing as if the chosen unit was the only element of the corpus.
// - When done, reset the coverage again.
// - Merge the newly created corpus into the original one.
void Fuzzer::Drill() {
// The corpus is already read, shuffled, and minimized.
assert(!Corpus.empty());
Options.PrintNEW = false; // Don't print NEW status lines when drilling.
Unit U = ChooseUnitToMutate();
ResetCoverage();
std::vector<Unit> SavedCorpus;
SavedCorpus.swap(Corpus);
Corpus.push_back(U);
UpdateCorpusDistribution();
assert(Corpus.size() == 1);
RunOne(U);
PrintStats("DRILL ");
std::string SavedOutputCorpusPath; // Don't write new units while drilling.
SavedOutputCorpusPath.swap(Options.OutputCorpus);
Loop();
ResetCoverage();
PrintStats("REINIT");
SavedOutputCorpusPath.swap(Options.OutputCorpus);
for (auto &U : SavedCorpus)
RunOne(U);
PrintStats("MERGE ");
Options.PrintNEW = true;
size_t NumMerged = 0;
for (auto &U : Corpus) {
if (RunOne(U)) {
PrintStatusForNewUnit(U);
NumMerged++;
WriteToOutputCorpus(U);
}
}
PrintStats("MERGED");
if (NumMerged && Options.Verbosity)
Printf("Drilling discovered %zd new units\n", NumMerged);
}
void Fuzzer::Loop() {
system_clock::time_point LastCorpusReload = system_clock::now();
if (Options.DoCrossOver)

View File

@ -5,6 +5,9 @@ RUN: echo b > %t/SUB1/SUB2/b
RUN: echo c > %t/SUB1/SUB2/SUB3/c
RUN: LLVMFuzzer-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=SUBDIRS
SUBDIRS: READ units: 3
RUN: echo zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz > %t/SUB1/long
RUN: LLVMFuzzer-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=LONG
LONG: INFO: -max_len is not provided, using 94
RUN: rm -rf %t/SUB1
RUN: not LLVMFuzzer-SimpleTest NONEXISTENT_DIR 2>&1 | FileCheck %s --check-prefix=NONEXISTENT_DIR

View File

@ -1,8 +0,0 @@
CHECK: BINGO
RUN: rm -rf FourIndependentBranchesTestCORPUS
RUN: mkdir FourIndependentBranchesTestCORPUS
RUN: LLVMFuzzer-FourIndependentBranchesTest -seed=1 -runs=100000 FourIndependentBranchesTestCORPUS
RUN: not LLVMFuzzer-FourIndependentBranchesTest -runs=100000 -drill=1 -jobs=200 FourIndependentBranchesTestCORPUS 2>&1 | FileCheck %s
RUN: rm -rf FourIndependentBranchesTestCORPUS

View File

@ -1,8 +0,0 @@
# Test truncate_units option.
RUN: rm -rf FuzzerTruncateTestCORPUS
RUN: mkdir FuzzerTruncateTestCORPUS
RUN: echo "01234567890123456789012345678901234567890" > FuzzerTruncateTestCORPUS/unit1
RUN: LLVMFuzzer-EmptyTest -seed=1 -runs=0 -truncate_units=1 ./FuzzerTruncateTestCORPUS 2>&1 | FileCheck %s
RUN: rm -rf FuzzerTruncateTestCORPUS
CHECK: INFO TRUNC 1 units added