[libFuzzer] Add an option to keep initial seed inputs around.

This patch adds an option "keep_seed" to keep all initial seed inputs in the
corpus. Previously, only the initial seed inputs that find new coverage were
added to the corpus, and all the other initial inputs were discarded. We
observed in some circumstances that useful initial seed inputs are discarded as
they find no new coverage, even though they contain useful fragments in them
(e.g., SQLITE3 FuzzBench benchmark). This newly added option provides a way to
keeping seed inputs in the corpus for those circumstances. With this patch, and
with -keep_seed=1, all initial seed inputs are kept in the corpus regardless of
whether they find new coverage or not. Further, these seed inputs are not
replaced with smaller inputs even if -reduce_inputs=1.

Differential Revision: https://reviews.llvm.org/D86577
This commit is contained in:
Dokyung Song 2020-07-31 00:07:20 +00:00
parent 7bc9924cb2
commit 62673c430d
10 changed files with 90 additions and 17 deletions

View File

@ -33,6 +33,7 @@ struct InputInfo {
// Stats.
size_t NumExecutedMutations = 0;
size_t NumSuccessfullMutations = 0;
bool NeverReduce = false;
bool MayDeleteFile = false;
bool Reduced = false;
bool HasFocusFunction = false;
@ -177,7 +178,7 @@ public:
bool empty() const { return Inputs.empty(); }
const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile,
bool HasFocusFunction,
bool HasFocusFunction, bool NeverReduce,
const Vector<uint32_t> &FeatureSet,
const DataFlowTrace &DFT, const InputInfo *BaseII) {
assert(!U.empty());
@ -187,6 +188,7 @@ public:
InputInfo &II = *Inputs.back();
II.U = U;
II.NumFeatures = NumFeatures;
II.NeverReduce = NeverReduce;
II.MayDeleteFile = MayDeleteFile;
II.UniqFeatureSet = FeatureSet;
II.HasFocusFunction = HasFocusFunction;

View File

@ -671,6 +671,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
Options.Verbosity = Flags.verbosity;
Options.MaxLen = Flags.max_len;
Options.LenControl = Flags.len_control;
Options.KeepSeed = Flags.keep_seed;
Options.UnitTimeoutSec = Flags.timeout;
Options.ErrorExitCode = Flags.error_exitcode;
Options.TimeoutExitCode = Flags.timeout_exitcode;

View File

@ -23,6 +23,10 @@ FUZZER_FLAG_INT(len_control, 100, "Try generating small inputs first, "
FUZZER_FLAG_STRING(seed_inputs, "A comma-separated list of input files "
"to use as an additional seed corpus. Alternatively, an \"@\" followed by "
"the name of a file containing the comma-separated list.")
FUZZER_FLAG_INT(keep_seed, 0, "If 1, keep seed inputs in the corpus even if "
"they do not produce new coverage. When used with |reduce_inputs==1|, the "
"seed inputs will never be reduced. This option can be useful when seeds are"
"not properly formed for the fuzz target but still have useful snippets.")
FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.")
FUZZER_FLAG_INT(mutate_depth, 5,
"Apply this number of consecutive mutations to each input.")

View File

@ -309,11 +309,15 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
else
Env.MainCorpusDir = CorpusDirs[0];
auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features,
{}, &Env.Cov,
CFPath, false);
RemoveFile(CFPath);
if (Options.KeepSeed) {
for (auto &File : SeedFiles)
Env.Files.push_back(File.File);
} else {
auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features,
{}, &Env.Cov, CFPath, false);
RemoveFile(CFPath);
}
Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
Env.Files.size(), Env.TempDir.c_str());

View File

@ -67,7 +67,8 @@ public:
void ExecuteCallback(const uint8_t *Data, size_t Size);
bool RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile = false,
InputInfo *II = nullptr, bool *FoundUniqFeatures = nullptr);
InputInfo *II = nullptr, bool ForceAddToCorpus = false,
bool *FoundUniqFeatures = nullptr);
// Merge Corpora[1:] into Corpora[0].
void Merge(const Vector<std::string> &Corpora);

View File

@ -464,7 +464,8 @@ static void RenameFeatureSetFile(const std::string &FeaturesDir,
}
bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
InputInfo *II, bool *FoundUniqFeatures) {
InputInfo *II, bool ForceAddToCorpus,
bool *FoundUniqFeatures) {
if (!Size)
return false;
@ -478,7 +479,7 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
UniqFeatureSetTmp.push_back(Feature);
if (Options.Entropic)
Corpus.UpdateFeatureFrequency(II, Feature);
if (Options.ReduceInputs && II)
if (Options.ReduceInputs && II && !II->NeverReduce)
if (std::binary_search(II->UniqFeatureSet.begin(),
II->UniqFeatureSet.end(), Feature))
FoundUniqFeaturesOfII++;
@ -487,11 +488,12 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
*FoundUniqFeatures = FoundUniqFeaturesOfII;
PrintPulseAndReportSlowInput(Data, Size);
size_t NumNewFeatures = Corpus.NumFeatureUpdates() - NumUpdatesBefore;
if (NumNewFeatures) {
if (NumNewFeatures || ForceAddToCorpus) {
TPC.UpdateObservedPCs();
auto NewII = Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures,
MayDeleteFile, TPC.ObservedFocusFunction(),
UniqFeatureSetTmp, DFT, II);
auto NewII =
Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile,
TPC.ObservedFocusFunction(), ForceAddToCorpus,
UniqFeatureSetTmp, DFT, II);
WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1),
NewII->UniqFeatureSet);
return true;
@ -700,7 +702,7 @@ void Fuzzer::MutateAndTestOne() {
bool FoundUniqFeatures = false;
bool NewCov = RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II,
&FoundUniqFeatures);
/*ForceAddToCorpus*/ false, &FoundUniqFeatures);
TryDetectingAMemoryLeak(CurrentUnitData, Size,
/*DuringInitialCorpusExecution*/ false);
if (NewCov) {
@ -768,7 +770,9 @@ void Fuzzer::ReadAndExecuteSeedCorpora(Vector<SizedFile> &CorporaFiles) {
for (auto &SF : CorporaFiles) {
auto U = FileToVector(SF.File, MaxInputLen, /*ExitOnError=*/false);
assert(U.size() <= MaxInputLen);
RunOne(U.data(), U.size());
RunOne(U.data(), U.size(), /*MayDeleteFile*/ false, /*II*/ nullptr,
/*ForceAddToCorpus*/ Options.KeepSeed,
/*FoundUniqFeatures*/ nullptr);
CheckExitOnSrcPosOrItem();
TryDetectingAMemoryLeak(U.data(), U.size(),
/*DuringInitialCorpusExecution*/ true);

View File

@ -18,6 +18,7 @@ struct FuzzingOptions {
int Verbosity = 1;
size_t MaxLen = 0;
size_t LenControl = 1000;
bool KeepSeed = false;
int UnitTimeoutSec = 300;
int TimeoutExitCode = 70;
int OOMExitCode = 71;

View File

@ -597,8 +597,10 @@ TEST(Corpus, Distribution) {
size_t N = 10;
size_t TriesPerUnit = 1<<16;
for (size_t i = 0; i < N; i++)
C->AddToCorpus(Unit{static_cast<uint8_t>(i)}, 1, false, false, {}, DFT,
nullptr);
C->AddToCorpus(Unit{static_cast<uint8_t>(i)}, /*NumFeatures*/ 1,
/*MayDeleteFile*/ false, /*HasFocusFunction*/ false,
/*ForceAddToCorpus*/ false, /*FeatureSet*/ {}, DFT,
/*BaseII*/ nullptr);
Vector<size_t> Hist(N);
for (size_t i = 0; i < N * TriesPerUnit; i++) {

View File

@ -0,0 +1,37 @@
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Test whether the fuzzer can find "SELECT FROM WHERE", given a seed input
// "SELECTxFROMxWHERE". Without -keep_seed=1, it takes longer time to trigger
// find the desired string, because the seed input is more likely to be reduced
// to a prefix of the given input first, losing useful fragments towards the end
// of the seed input.
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
static volatile int Sink = 0;
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
if (Size > 17)
return 0;
if (Size >= 6 && Data[0] == 'S' && Data[1] == 'E' && Data[2] == 'L' &&
Data[3] == 'E' && Data[4] == 'C' && Data[5] == 'T') {
if (Size >= 7 && Data[6] == ' ') {
if (Size >= 11 && Data[7] == 'F' && Data[8] == 'R' && Data[9] == 'O' &&
Data[10] == 'M') {
if (Size >= 12 && Data[11] == ' ') {
if (Size >= 17 && Data[12] == 'W' && Data[13] == 'H' &&
Data[14] == 'E' && Data[15] == 'R' && Data[16] == 'E') {
fprintf(stderr, "BINGO; Found the target, exiting.\n");
exit(1);
}
}
}
}
}
return 0;
}

View File

@ -0,0 +1,17 @@
REQUIRES: linux, x86_64
RUN: %cpp_compiler %S/KeepSeedTest.cpp -o %t-KeepSeedTest
RUN: rm -rf %t-corpus
RUN: mkdir %t-corpus
RUN: echo -n SELECTxFROMxWHERE > %t-corpus/valid-fragments
RUN: not %run %t-KeepSeedTest -keep_seed=1 -seed=1 -runs=2000000 %t-corpus 2>&1 | FileCheck %s
CHECK: BINGO
RUN: rm -rf %t-corpus-baseline
RUN: mkdir %t-corpus-baseline
RUN: echo -n SELECTxFROMxWHERE > %t-corpus-baseline/valid-fragments
# The following checks whether without -keep_seed=1 libFuzzer does not find the
# crashing input "SELECT FROM WHERE" even with 2x more runs.
RUN: %run %t-KeepSeedTest -seed=1 -runs=4000000 %t-corpus-baseline -print_final_stats=1