forked from OSchip/llvm-project
[libFuzzer] Add an option to keep initial seed inputs around.
This patch adds an option "keep_seed" to keep all initial seed inputs in the corpus. Previously, only the initial seed inputs that find new coverage were added to the corpus, and all the other initial inputs were discarded. We observed in some circumstances that useful initial seed inputs are discarded as they find no new coverage, even though they contain useful fragments in them (e.g., SQLITE3 FuzzBench benchmark). This newly added option provides a way to keeping seed inputs in the corpus for those circumstances. With this patch, and with -keep_seed=1, all initial seed inputs are kept in the corpus regardless of whether they find new coverage or not. Further, these seed inputs are not replaced with smaller inputs even if -reduce_inputs=1. Differential Revision: https://reviews.llvm.org/D86577
This commit is contained in:
parent
7bc9924cb2
commit
62673c430d
|
@ -33,6 +33,7 @@ struct InputInfo {
|
|||
// Stats.
|
||||
size_t NumExecutedMutations = 0;
|
||||
size_t NumSuccessfullMutations = 0;
|
||||
bool NeverReduce = false;
|
||||
bool MayDeleteFile = false;
|
||||
bool Reduced = false;
|
||||
bool HasFocusFunction = false;
|
||||
|
@ -177,7 +178,7 @@ public:
|
|||
bool empty() const { return Inputs.empty(); }
|
||||
const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
|
||||
InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile,
|
||||
bool HasFocusFunction,
|
||||
bool HasFocusFunction, bool NeverReduce,
|
||||
const Vector<uint32_t> &FeatureSet,
|
||||
const DataFlowTrace &DFT, const InputInfo *BaseII) {
|
||||
assert(!U.empty());
|
||||
|
@ -187,6 +188,7 @@ public:
|
|||
InputInfo &II = *Inputs.back();
|
||||
II.U = U;
|
||||
II.NumFeatures = NumFeatures;
|
||||
II.NeverReduce = NeverReduce;
|
||||
II.MayDeleteFile = MayDeleteFile;
|
||||
II.UniqFeatureSet = FeatureSet;
|
||||
II.HasFocusFunction = HasFocusFunction;
|
||||
|
|
|
@ -671,6 +671,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
|
|||
Options.Verbosity = Flags.verbosity;
|
||||
Options.MaxLen = Flags.max_len;
|
||||
Options.LenControl = Flags.len_control;
|
||||
Options.KeepSeed = Flags.keep_seed;
|
||||
Options.UnitTimeoutSec = Flags.timeout;
|
||||
Options.ErrorExitCode = Flags.error_exitcode;
|
||||
Options.TimeoutExitCode = Flags.timeout_exitcode;
|
||||
|
|
|
@ -23,6 +23,10 @@ FUZZER_FLAG_INT(len_control, 100, "Try generating small inputs first, "
|
|||
FUZZER_FLAG_STRING(seed_inputs, "A comma-separated list of input files "
|
||||
"to use as an additional seed corpus. Alternatively, an \"@\" followed by "
|
||||
"the name of a file containing the comma-separated list.")
|
||||
FUZZER_FLAG_INT(keep_seed, 0, "If 1, keep seed inputs in the corpus even if "
|
||||
"they do not produce new coverage. When used with |reduce_inputs==1|, the "
|
||||
"seed inputs will never be reduced. This option can be useful when seeds are"
|
||||
"not properly formed for the fuzz target but still have useful snippets.")
|
||||
FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.")
|
||||
FUZZER_FLAG_INT(mutate_depth, 5,
|
||||
"Apply this number of consecutive mutations to each input.")
|
||||
|
|
|
@ -309,11 +309,15 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
|
|||
else
|
||||
Env.MainCorpusDir = CorpusDirs[0];
|
||||
|
||||
auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
|
||||
CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features,
|
||||
{}, &Env.Cov,
|
||||
CFPath, false);
|
||||
RemoveFile(CFPath);
|
||||
if (Options.KeepSeed) {
|
||||
for (auto &File : SeedFiles)
|
||||
Env.Files.push_back(File.File);
|
||||
} else {
|
||||
auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
|
||||
CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features,
|
||||
{}, &Env.Cov, CFPath, false);
|
||||
RemoveFile(CFPath);
|
||||
}
|
||||
Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
|
||||
Env.Files.size(), Env.TempDir.c_str());
|
||||
|
||||
|
|
|
@ -67,7 +67,8 @@ public:
|
|||
|
||||
void ExecuteCallback(const uint8_t *Data, size_t Size);
|
||||
bool RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile = false,
|
||||
InputInfo *II = nullptr, bool *FoundUniqFeatures = nullptr);
|
||||
InputInfo *II = nullptr, bool ForceAddToCorpus = false,
|
||||
bool *FoundUniqFeatures = nullptr);
|
||||
|
||||
// Merge Corpora[1:] into Corpora[0].
|
||||
void Merge(const Vector<std::string> &Corpora);
|
||||
|
|
|
@ -464,7 +464,8 @@ static void RenameFeatureSetFile(const std::string &FeaturesDir,
|
|||
}
|
||||
|
||||
bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
|
||||
InputInfo *II, bool *FoundUniqFeatures) {
|
||||
InputInfo *II, bool ForceAddToCorpus,
|
||||
bool *FoundUniqFeatures) {
|
||||
if (!Size)
|
||||
return false;
|
||||
|
||||
|
@ -478,7 +479,7 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
|
|||
UniqFeatureSetTmp.push_back(Feature);
|
||||
if (Options.Entropic)
|
||||
Corpus.UpdateFeatureFrequency(II, Feature);
|
||||
if (Options.ReduceInputs && II)
|
||||
if (Options.ReduceInputs && II && !II->NeverReduce)
|
||||
if (std::binary_search(II->UniqFeatureSet.begin(),
|
||||
II->UniqFeatureSet.end(), Feature))
|
||||
FoundUniqFeaturesOfII++;
|
||||
|
@ -487,11 +488,12 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
|
|||
*FoundUniqFeatures = FoundUniqFeaturesOfII;
|
||||
PrintPulseAndReportSlowInput(Data, Size);
|
||||
size_t NumNewFeatures = Corpus.NumFeatureUpdates() - NumUpdatesBefore;
|
||||
if (NumNewFeatures) {
|
||||
if (NumNewFeatures || ForceAddToCorpus) {
|
||||
TPC.UpdateObservedPCs();
|
||||
auto NewII = Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures,
|
||||
MayDeleteFile, TPC.ObservedFocusFunction(),
|
||||
UniqFeatureSetTmp, DFT, II);
|
||||
auto NewII =
|
||||
Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile,
|
||||
TPC.ObservedFocusFunction(), ForceAddToCorpus,
|
||||
UniqFeatureSetTmp, DFT, II);
|
||||
WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1),
|
||||
NewII->UniqFeatureSet);
|
||||
return true;
|
||||
|
@ -700,7 +702,7 @@ void Fuzzer::MutateAndTestOne() {
|
|||
|
||||
bool FoundUniqFeatures = false;
|
||||
bool NewCov = RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II,
|
||||
&FoundUniqFeatures);
|
||||
/*ForceAddToCorpus*/ false, &FoundUniqFeatures);
|
||||
TryDetectingAMemoryLeak(CurrentUnitData, Size,
|
||||
/*DuringInitialCorpusExecution*/ false);
|
||||
if (NewCov) {
|
||||
|
@ -768,7 +770,9 @@ void Fuzzer::ReadAndExecuteSeedCorpora(Vector<SizedFile> &CorporaFiles) {
|
|||
for (auto &SF : CorporaFiles) {
|
||||
auto U = FileToVector(SF.File, MaxInputLen, /*ExitOnError=*/false);
|
||||
assert(U.size() <= MaxInputLen);
|
||||
RunOne(U.data(), U.size());
|
||||
RunOne(U.data(), U.size(), /*MayDeleteFile*/ false, /*II*/ nullptr,
|
||||
/*ForceAddToCorpus*/ Options.KeepSeed,
|
||||
/*FoundUniqFeatures*/ nullptr);
|
||||
CheckExitOnSrcPosOrItem();
|
||||
TryDetectingAMemoryLeak(U.data(), U.size(),
|
||||
/*DuringInitialCorpusExecution*/ true);
|
||||
|
|
|
@ -18,6 +18,7 @@ struct FuzzingOptions {
|
|||
int Verbosity = 1;
|
||||
size_t MaxLen = 0;
|
||||
size_t LenControl = 1000;
|
||||
bool KeepSeed = false;
|
||||
int UnitTimeoutSec = 300;
|
||||
int TimeoutExitCode = 70;
|
||||
int OOMExitCode = 71;
|
||||
|
|
|
@ -597,8 +597,10 @@ TEST(Corpus, Distribution) {
|
|||
size_t N = 10;
|
||||
size_t TriesPerUnit = 1<<16;
|
||||
for (size_t i = 0; i < N; i++)
|
||||
C->AddToCorpus(Unit{static_cast<uint8_t>(i)}, 1, false, false, {}, DFT,
|
||||
nullptr);
|
||||
C->AddToCorpus(Unit{static_cast<uint8_t>(i)}, /*NumFeatures*/ 1,
|
||||
/*MayDeleteFile*/ false, /*HasFocusFunction*/ false,
|
||||
/*ForceAddToCorpus*/ false, /*FeatureSet*/ {}, DFT,
|
||||
/*BaseII*/ nullptr);
|
||||
|
||||
Vector<size_t> Hist(N);
|
||||
for (size_t i = 0; i < N * TriesPerUnit; i++) {
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
// Test whether the fuzzer can find "SELECT FROM WHERE", given a seed input
|
||||
// "SELECTxFROMxWHERE". Without -keep_seed=1, it takes longer time to trigger
|
||||
// find the desired string, because the seed input is more likely to be reduced
|
||||
// to a prefix of the given input first, losing useful fragments towards the end
|
||||
// of the seed input.
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
static volatile int Sink = 0;
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
|
||||
if (Size > 17)
|
||||
return 0;
|
||||
|
||||
if (Size >= 6 && Data[0] == 'S' && Data[1] == 'E' && Data[2] == 'L' &&
|
||||
Data[3] == 'E' && Data[4] == 'C' && Data[5] == 'T') {
|
||||
if (Size >= 7 && Data[6] == ' ') {
|
||||
if (Size >= 11 && Data[7] == 'F' && Data[8] == 'R' && Data[9] == 'O' &&
|
||||
Data[10] == 'M') {
|
||||
if (Size >= 12 && Data[11] == ' ') {
|
||||
if (Size >= 17 && Data[12] == 'W' && Data[13] == 'H' &&
|
||||
Data[14] == 'E' && Data[15] == 'R' && Data[16] == 'E') {
|
||||
fprintf(stderr, "BINGO; Found the target, exiting.\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
REQUIRES: linux, x86_64
|
||||
RUN: %cpp_compiler %S/KeepSeedTest.cpp -o %t-KeepSeedTest
|
||||
|
||||
RUN: rm -rf %t-corpus
|
||||
RUN: mkdir %t-corpus
|
||||
RUN: echo -n SELECTxFROMxWHERE > %t-corpus/valid-fragments
|
||||
|
||||
RUN: not %run %t-KeepSeedTest -keep_seed=1 -seed=1 -runs=2000000 %t-corpus 2>&1 | FileCheck %s
|
||||
CHECK: BINGO
|
||||
|
||||
RUN: rm -rf %t-corpus-baseline
|
||||
RUN: mkdir %t-corpus-baseline
|
||||
RUN: echo -n SELECTxFROMxWHERE > %t-corpus-baseline/valid-fragments
|
||||
|
||||
# The following checks whether without -keep_seed=1 libFuzzer does not find the
|
||||
# crashing input "SELECT FROM WHERE" even with 2x more runs.
|
||||
RUN: %run %t-KeepSeedTest -seed=1 -runs=4000000 %t-corpus-baseline -print_final_stats=1
|
Loading…
Reference in New Issue