diff --git a/llvm/lib/Fuzzer/FuzzerDriver.cpp b/llvm/lib/Fuzzer/FuzzerDriver.cpp index f7f65e056177..054d4c5d32f1 100644 --- a/llvm/lib/Fuzzer/FuzzerDriver.cpp +++ b/llvm/lib/Fuzzer/FuzzerDriver.cpp @@ -426,7 +426,6 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.PrintNewCovPcs = Flags.print_pcs; Options.PrintFinalStats = Flags.print_final_stats; Options.PrintCoverage = Flags.print_coverage; - Options.TruncateUnits = Flags.truncate_units; Options.PruneCorpus = Flags.prune_corpus; if (Flags.use_value_profile) @@ -495,25 +494,26 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { size_t TemporaryMaxLen = Options.MaxLen ? Options.MaxLen : kMaxSaneLen; - F.RereadOutputCorpus(TemporaryMaxLen); - for (auto &inp : *Inputs) - if (inp != Options.OutputCorpus) - F.ReadDir(inp, nullptr, TemporaryMaxLen); + UnitVector InitialCorpus; + for (auto &Inp : *Inputs) { + Printf("Loading corpus dir: %s\n", Inp.c_str()); + ReadDirToVectorOfUnits(Inp.c_str(), &InitialCorpus, nullptr, TemporaryMaxLen); + } - if (Options.MaxLen == 0) - F.SetMaxLen( - std::min(std::max(kMinDefaultLen, F.MaxUnitSizeInCorpus()), kMaxSaneLen)); + if (Options.MaxLen == 0) { + size_t MaxLen = 0; + for (auto &U : InitialCorpus) + MaxLen = std::max(U.size(), MaxLen); + F.SetMaxLen(std::min(std::max(kMinDefaultLen, MaxLen), kMaxSaneLen)); + } - if (F.CorpusSize() == 0) { - F.AddToCorpus(Unit()); // Can't fuzz empty corpus, so add an empty input. + if (InitialCorpus.empty()) { + InitialCorpus.push_back(Unit()); if (Options.Verbosity) Printf("INFO: A corpus is not provided, starting from an empty corpus\n"); } - F.ShuffleAndMinimize(); - if (Flags.drill) - F.Drill(); - else - F.Loop(); + F.ShuffleAndMinimize(&InitialCorpus); + F.Loop(); if (Flags.verbosity) Printf("Done %d runs in %zd second(s)\n", F.getTotalNumberOfRuns(), diff --git a/llvm/lib/Fuzzer/FuzzerFlags.def b/llvm/lib/Fuzzer/FuzzerFlags.def index c4de9974b51f..fe488228a34d 100644 --- a/llvm/lib/Fuzzer/FuzzerFlags.def +++ b/llvm/lib/Fuzzer/FuzzerFlags.def @@ -71,8 +71,6 @@ FUZZER_FLAG_STRING(exact_artifact_path, "as $(exact_artifact_path). This overrides -artifact_prefix " "and will not use checksum in the file name. Do not " "use the same path for several parallel processes.") -FUZZER_FLAG_INT(drill, 0, "Experimental: fuzz using a single unit as the seed " - "corpus, then merge with the initial corpus") FUZZER_FLAG_INT(output_csv, 0, "Enable pulse output in CSV format.") FUZZER_FLAG_INT(print_pcs, 0, "If 1, print out newly covered PCs.") FUZZER_FLAG_INT(print_final_stats, 0, "If 1, print statistics at exit.") @@ -92,7 +90,6 @@ FUZZER_FLAG_INT(detect_leaks, 1, "If 1, and if LeakSanitizer is enabled " "try to detect memory leaks during fuzzing (i.e. not only at shut down).") FUZZER_FLAG_INT(rss_limit_mb, 2048, "If non-zero, the fuzzer will exit upon" "reaching this limit of RSS memory usage.") -FUZZER_FLAG_INT(truncate_units, 0, "Try truncated units when loading corpus.") FUZZER_FLAG_INT(prune_corpus, 1, "Prune corpus items without new coverage when " "loading corpus.") @@ -101,3 +98,5 @@ FUZZER_DEPRECATED_FLAG(save_minimized_corpus) FUZZER_DEPRECATED_FLAG(sync_command) FUZZER_DEPRECATED_FLAG(sync_timeout) FUZZER_DEPRECATED_FLAG(test_single_input) +FUZZER_DEPRECATED_FLAG(drill) +FUZZER_DEPRECATED_FLAG(truncate_units) diff --git a/llvm/lib/Fuzzer/FuzzerIO.cpp b/llvm/lib/Fuzzer/FuzzerIO.cpp index 0e0c4e989cc4..5b394b641a3a 100644 --- a/llvm/lib/Fuzzer/FuzzerIO.cpp +++ b/llvm/lib/Fuzzer/FuzzerIO.cpp @@ -30,7 +30,7 @@ bool IsFile(const std::string &Path) { return S_ISREG(St.st_mode); } -static long GetEpoch(const std::string &Path) { +long GetEpoch(const std::string &Path) { struct stat St; if (stat(Path.c_str(), &St)) return 0; // Can't stat, be conservative. diff --git a/llvm/lib/Fuzzer/FuzzerInternal.h b/llvm/lib/Fuzzer/FuzzerInternal.h index e4ac4413b635..1035c049dc7a 100644 --- a/llvm/lib/Fuzzer/FuzzerInternal.h +++ b/llvm/lib/Fuzzer/FuzzerInternal.h @@ -89,6 +89,7 @@ private: typedef FixedWord<27> Word; // 28 bytes. bool IsFile(const std::string &Path); +long GetEpoch(const std::string &Path); std::string FileToString(const std::string &Path); Unit FileToVector(const std::string &Path, size_t MaxSize = 0); void ReadDirToVectorOfUnits(const char *Path, std::vector *V, @@ -246,10 +247,44 @@ struct FuzzingOptions { bool PrintFinalStats = false; bool PrintCoverage = false; bool DetectLeaks = true; - bool TruncateUnits = false; bool PruneCorpus = true; }; +struct InputInfo { + Unit U; // The actual input data. +}; + +class InputCorpus { + public: + InputCorpus() { + Corpus.reserve(1 << 14); // Avoid too many resizes. + } + size_t size() const { return Corpus.size(); } + bool empty() const { return Corpus.empty(); } + const Unit &operator[] (size_t Idx) const { return Corpus[Idx].U; } + void Append(const std::vector &V) { + for (auto &U : V) + push_back(U); + } + void push_back(const Unit &U) { + auto H = Hash(U); + if (!Hashes.insert(H).second) return; + InputInfo II; + II.U = U; + Corpus.push_back(II); + } + + typedef const std::vector::const_iterator ConstIter; + ConstIter begin() const { return Corpus.begin(); } + ConstIter end() const { return Corpus.end(); } + + bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } + + private: + std::unordered_set Hashes; + std::vector Corpus; +}; + class MutationDispatcher { public: MutationDispatcher(Random &Rand, const FuzzingOptions &Options); @@ -316,7 +351,7 @@ public: void ClearAutoDictionary(); void PrintRecommendedDictionary(); - void SetCorpus(const std::vector *Corpus) { this->Corpus = Corpus; } + void SetCorpus(const InputCorpus *Corpus) { this->Corpus = Corpus; } Random &GetRand() { return Rand; } @@ -350,7 +385,7 @@ private: Dictionary PersistentAutoDictionary; std::vector CurrentMutatorSequence; std::vector CurrentDictionaryEntrySequence; - const std::vector *Corpus = nullptr; + const InputCorpus *Corpus = nullptr; std::vector MutateInPlaceHere; std::vector Mutators; @@ -458,21 +493,13 @@ public: } size_t ChooseUnitIdxToMutate(); const Unit &ChooseUnitToMutate() { return Corpus[ChooseUnitIdxToMutate()]; }; - void TruncateUnits(std::vector *NewCorpus); void Loop(); - void Drill(); - void ShuffleAndMinimize(); + void ShuffleAndMinimize(UnitVector *V); void InitializeTraceState(); void AssignTaintLabels(uint8_t *Data, size_t Size); size_t CorpusSize() const { return Corpus.size(); } - size_t MaxUnitSizeInCorpus() const; - void ReadDir(const std::string &Path, long *Epoch, size_t MaxSize) { - Printf("Loading corpus: %s\n", Path.c_str()); - ReadDirToVectorOfUnits(Path.c_str(), &Corpus, Epoch, MaxSize); - } + void ReadDir(const std::string &Path, long *Epoch, size_t MaxSize); void RereadOutputCorpus(size_t MaxSize); - // Save the current corpus to OutputCorpus. - void SaveCorpus(); size_t secondsSinceProcessStartUp() { return duration_cast(system_clock::now() - ProcessStartTime) @@ -561,8 +588,7 @@ private: bool HasMoreMallocsThanFrees = false; size_t NumberOfLeakDetectionAttempts = 0; - std::vector Corpus; - std::unordered_set UnitHashesAddedToCorpus; + InputCorpus Corpus; std::piecewise_constant_distribution CorpusDistribution; UserCallback CB; diff --git a/llvm/lib/Fuzzer/FuzzerLoop.cpp b/llvm/lib/Fuzzer/FuzzerLoop.cpp index 3ef7c5f68aed..2dcf11eb36d3 100644 --- a/llvm/lib/Fuzzer/FuzzerLoop.cpp +++ b/llvm/lib/Fuzzer/FuzzerLoop.cpp @@ -172,6 +172,8 @@ Fuzzer::Fuzzer(UserCallback CB, MutationDispatcher &MD, FuzzingOptions Options) } if (Options.Verbosity) TPC.PrintModuleInfo(); + if (!Options.OutputCorpus.empty() && Options.Reload) + EpochOfLastReadOfOutputCorpus = GetEpoch(Options.OutputCorpus); } Fuzzer::~Fuzzer() { } @@ -340,13 +342,6 @@ void Fuzzer::PrintFinalStats() { Printf("stat::peak_rss_mb: %zd\n", GetPeakRSSMb()); } -size_t Fuzzer::MaxUnitSizeInCorpus() const { - size_t Res = 0; - for (auto &X : Corpus) - Res = std::max(Res, X.size()); - return Res; -} - void Fuzzer::SetMaxLen(size_t MaxLen) { assert(Options.MaxLen == 0); // Can only reset MaxLen from 0 to non-0. assert(MaxLen); @@ -354,25 +349,25 @@ void Fuzzer::SetMaxLen(size_t MaxLen) { Printf("INFO: -max_len is not provided, using %zd\n", Options.MaxLen); } +void Fuzzer::ReadDir(const std::string &Path, long *Epoch, size_t MaxSize) { + Printf("Loading corpus: %s\n", Path.c_str()); + std::vector V; + ReadDirToVectorOfUnits(Path.c_str(), &V, Epoch, MaxSize); + for (auto &U : V) + Corpus.push_back(U); +} void Fuzzer::RereadOutputCorpus(size_t MaxSize) { - if (Options.OutputCorpus.empty()) - return; + if (Options.OutputCorpus.empty() || !Options.Reload) return; std::vector AdditionalCorpus; ReadDirToVectorOfUnits(Options.OutputCorpus.c_str(), &AdditionalCorpus, &EpochOfLastReadOfOutputCorpus, MaxSize); - if (Corpus.empty()) { - Corpus = AdditionalCorpus; - return; - } - if (!Options.Reload) - return; if (Options.Verbosity >= 2) Printf("Reload: read %zd new units.\n", AdditionalCorpus.size()); for (auto &X : AdditionalCorpus) { if (X.size() > MaxSize) X.resize(MaxSize); - if (UnitHashesAddedToCorpus.insert(Hash(X)).second) { + if (!Corpus.HasUnit(X)) { if (RunOne(X)) { Corpus.push_back(X); UpdateCorpusDistribution(); @@ -390,51 +385,22 @@ void Fuzzer::ShuffleCorpus(UnitVector *V) { }); } -// Tries random prefixes of corpus items. -void Fuzzer::TruncateUnits(std::vector *NewCorpus) { - std::vector Fractions = {0.25, 0.5, 0.75, 1.0}; - - size_t TruncInputs = 0; - for (double Fraction : Fractions) { - for (const auto &U : Corpus) { - uint64_t S = MD.GetRand()(U.size() * Fraction); - if (!S || !RunOne(U.data(), S)) - continue; - TruncInputs++; - Unit U1(U.begin(), U.begin() + S); - NewCorpus->push_back(U1); - } - } - if (TruncInputs) - Printf("\tINFO TRUNC %zd units added to in-memory corpus\n", TruncInputs); -} - -void Fuzzer::ShuffleAndMinimize() { - PrintStats("READ "); - std::vector NewCorpus; +void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) { + Printf("#0\tREAD units: %zd\n", InitialCorpus->size()); if (Options.ShuffleAtStartUp) - ShuffleCorpus(&Corpus); + ShuffleCorpus(InitialCorpus); - if (Options.TruncateUnits) { - ResetCoverage(); - TruncateUnits(&NewCorpus); - ResetCoverage(); - } - - for (const auto &U : Corpus) { + for (const auto &U : *InitialCorpus) { bool NewCoverage = RunOne(U); if (!Options.PruneCorpus || NewCoverage) { - NewCorpus.push_back(U); + Corpus.push_back(U); if (Options.Verbosity >= 2) Printf("NEW0: %zd L %zd\n", MaxCoverage.BlockCoverage, U.size()); } TryDetectingAMemoryLeak(U.data(), U.size(), /*DuringInitialCorpusExecution*/ true); } - Corpus = NewCorpus; UpdateCorpusDistribution(); - for (auto &X : Corpus) - UnitHashesAddedToCorpus.insert(Hash(X)); PrintStats("INITED"); if (Corpus.empty()) { Printf("ERROR: no interesting inputs were found. " @@ -540,16 +506,6 @@ void Fuzzer::WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix) { Printf("Base64: %s\n", Base64(U).c_str()); } -void Fuzzer::SaveCorpus() { - if (Options.OutputCorpus.empty()) - return; - for (const auto &U : Corpus) - WriteToFile(U, DirPlusFile(Options.OutputCorpus, Hash(U))); - if (Options.Verbosity) - Printf("Written corpus of %zd files to %s\n", Corpus.size(), - Options.OutputCorpus.c_str()); -} - void Fuzzer::PrintStatusForNewUnit(const Unit &U) { if (!Options.PrintNEW) return; @@ -584,7 +540,6 @@ void Fuzzer::PrintNewPCs() { void Fuzzer::ReportNewCoverage(const Unit &U) { Corpus.push_back(U); UpdateCorpusDistribution(); - UnitHashesAddedToCorpus.insert(Hash(U)); MD.RecordSuccessfulMutationSequence(); PrintStatusForNewUnit(U); WriteToOutputCorpus(U); @@ -599,6 +554,7 @@ void Fuzzer::ReportNewCoverage(const Unit &U) { UnitVector Fuzzer::FindExtraUnits(const UnitVector &Initial, const UnitVector &Extra) { UnitVector Res = Extra; + UnitVector Tmp; size_t OldSize = Res.size(); for (int Iter = 0; Iter < 10; Iter++) { ShuffleCorpus(&Res); @@ -607,18 +563,18 @@ UnitVector Fuzzer::FindExtraUnits(const UnitVector &Initial, for (auto &U : Initial) RunOne(U); - Corpus.clear(); + Tmp.clear(); for (auto &U : Res) if (RunOne(U)) - Corpus.push_back(U); + Tmp.push_back(U); char Stat[7] = "MIN "; Stat[3] = '0' + Iter; PrintStats(Stat); - size_t NewSize = Corpus.size(); + size_t NewSize = Tmp.size(); assert(NewSize <= OldSize); - Res.swap(Corpus); + Res.swap(Tmp); if (NewSize + 5 >= OldSize) break; @@ -736,54 +692,6 @@ void Fuzzer::ResetCoverage() { PrepareCounters(&MaxCoverage); } -// Experimental search heuristic: drilling. -// - Read, shuffle, execute and minimize the corpus. -// - Choose one random unit. -// - Reset the coverage. -// - Start fuzzing as if the chosen unit was the only element of the corpus. -// - When done, reset the coverage again. -// - Merge the newly created corpus into the original one. -void Fuzzer::Drill() { - // The corpus is already read, shuffled, and minimized. - assert(!Corpus.empty()); - Options.PrintNEW = false; // Don't print NEW status lines when drilling. - - Unit U = ChooseUnitToMutate(); - - ResetCoverage(); - - std::vector SavedCorpus; - SavedCorpus.swap(Corpus); - Corpus.push_back(U); - UpdateCorpusDistribution(); - assert(Corpus.size() == 1); - RunOne(U); - PrintStats("DRILL "); - std::string SavedOutputCorpusPath; // Don't write new units while drilling. - SavedOutputCorpusPath.swap(Options.OutputCorpus); - Loop(); - - ResetCoverage(); - - PrintStats("REINIT"); - SavedOutputCorpusPath.swap(Options.OutputCorpus); - for (auto &U : SavedCorpus) - RunOne(U); - PrintStats("MERGE "); - Options.PrintNEW = true; - size_t NumMerged = 0; - for (auto &U : Corpus) { - if (RunOne(U)) { - PrintStatusForNewUnit(U); - NumMerged++; - WriteToOutputCorpus(U); - } - } - PrintStats("MERGED"); - if (NumMerged && Options.Verbosity) - Printf("Drilling discovered %zd new units\n", NumMerged); -} - void Fuzzer::Loop() { system_clock::time_point LastCorpusReload = system_clock::now(); if (Options.DoCrossOver) diff --git a/llvm/lib/Fuzzer/test/fuzzer-dirs.test b/llvm/lib/Fuzzer/test/fuzzer-dirs.test index 3eaaf6b6bb5b..63afe8dfcf9c 100644 --- a/llvm/lib/Fuzzer/test/fuzzer-dirs.test +++ b/llvm/lib/Fuzzer/test/fuzzer-dirs.test @@ -5,6 +5,9 @@ RUN: echo b > %t/SUB1/SUB2/b RUN: echo c > %t/SUB1/SUB2/SUB3/c RUN: LLVMFuzzer-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=SUBDIRS SUBDIRS: READ units: 3 +RUN: echo zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz > %t/SUB1/long +RUN: LLVMFuzzer-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=LONG +LONG: INFO: -max_len is not provided, using 94 RUN: rm -rf %t/SUB1 RUN: not LLVMFuzzer-SimpleTest NONEXISTENT_DIR 2>&1 | FileCheck %s --check-prefix=NONEXISTENT_DIR diff --git a/llvm/lib/Fuzzer/test/fuzzer-drill.test b/llvm/lib/Fuzzer/test/fuzzer-drill.test deleted file mode 100644 index b2fc1fecd276..000000000000 --- a/llvm/lib/Fuzzer/test/fuzzer-drill.test +++ /dev/null @@ -1,8 +0,0 @@ -CHECK: BINGO -RUN: rm -rf FourIndependentBranchesTestCORPUS -RUN: mkdir FourIndependentBranchesTestCORPUS -RUN: LLVMFuzzer-FourIndependentBranchesTest -seed=1 -runs=100000 FourIndependentBranchesTestCORPUS -RUN: not LLVMFuzzer-FourIndependentBranchesTest -runs=100000 -drill=1 -jobs=200 FourIndependentBranchesTestCORPUS 2>&1 | FileCheck %s -RUN: rm -rf FourIndependentBranchesTestCORPUS - - diff --git a/llvm/lib/Fuzzer/test/fuzzer-trunc.test b/llvm/lib/Fuzzer/test/fuzzer-trunc.test deleted file mode 100644 index a25715aa9765..000000000000 --- a/llvm/lib/Fuzzer/test/fuzzer-trunc.test +++ /dev/null @@ -1,8 +0,0 @@ -# Test truncate_units option. -RUN: rm -rf FuzzerTruncateTestCORPUS -RUN: mkdir FuzzerTruncateTestCORPUS -RUN: echo "01234567890123456789012345678901234567890" > FuzzerTruncateTestCORPUS/unit1 -RUN: LLVMFuzzer-EmptyTest -seed=1 -runs=0 -truncate_units=1 ./FuzzerTruncateTestCORPUS 2>&1 | FileCheck %s -RUN: rm -rf FuzzerTruncateTestCORPUS - -CHECK: INFO TRUNC 1 units added