Redistribute energy for Corpus

I found that the initial corpus allocation of fork mode has certain defects.
I designed a new initial corpus allocation strategy based on size grouping.
This method can give more energy to the small seeds in the corpus and
increase the throughput of the test.

Fuzzbench data (glibfuzzer is -fork_corpus_groups=1):
https://www.fuzzbench.com/reports/experimental/2021-08-05-parallel/index.html

Reviewed By: morehouse

Differential Revision: https://reviews.llvm.org/D105084
This commit is contained in:
gtt1995 2021-09-08 06:18:19 -07:00 committed by Matt Morehouse
parent aecd75f095
commit a30dbbe924
5 changed files with 106 additions and 7 deletions

View File

@ -870,6 +870,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
exit(0);
}
Options.ForkCorpusGroups = Flags.fork_corpus_groups;
if (Flags.fork)
FuzzWithFork(F->GetMD().GetRand(), Options, Args, *Inputs, Flags.fork);

View File

@ -58,6 +58,10 @@ FUZZER_FLAG_INT(max_total_time, 0, "If positive, indicates the maximal total "
FUZZER_FLAG_INT(help, 0, "Print help.")
FUZZER_FLAG_INT(fork, 0, "Experimental mode where fuzzing happens "
"in a subprocess")
FUZZER_FLAG_INT(fork_corpus_groups, 0, "For fork mode, enable the corpus-group "
"strategy, The main corpus will be grouped according to size, "
"and each sub-process will randomly select seeds from different "
"groups as the sub-corpus.")
FUZZER_FLAG_INT(ignore_timeouts, 1, "Ignore timeouts in fork mode")
FUZZER_FLAG_INT(ignore_ooms, 1, "Ignore OOMs in fork mode")
FUZZER_FLAG_INT(ignore_crashes, 0, "Ignore crashes in fork mode")

View File

@ -95,9 +95,12 @@ struct GlobalEnv {
std::set<uint32_t> Features, Cov;
std::set<std::string> FilesWithDFT;
std::vector<std::string> Files;
std::vector<std::size_t> FilesSizes;
Random *Rand;
std::chrono::system_clock::time_point ProcessStartTime;
int Verbosity = 0;
int Group = 0;
int NumCorpuses = 8;
size_t NumTimeouts = 0;
size_t NumOOMs = 0;
@ -136,10 +139,24 @@ struct GlobalEnv {
if (size_t CorpusSubsetSize =
std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) {
auto Time1 = std::chrono::system_clock::now();
for (size_t i = 0; i < CorpusSubsetSize; i++) {
auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
Seeds += (Seeds.empty() ? "" : ",") + SF;
CollectDFT(SF);
if (Group) { // whether to group the corpus.
size_t AverageCorpusSize = Files.size() / NumCorpuses + 1;
size_t StartIndex = ((JobId - 1) % NumCorpuses) * AverageCorpusSize;
for (size_t i = 0; i < CorpusSubsetSize; i++) {
size_t RandNum = (*Rand)(AverageCorpusSize);
size_t Index = RandNum + StartIndex;
Index = Index < Files.size() ? Index
: Rand->SkewTowardsLast(Files.size());
auto &SF = Files[Index];
Seeds += (Seeds.empty() ? "" : ",") + SF;
CollectDFT(SF);
}
} else {
for (size_t i = 0; i < CorpusSubsetSize; i++) {
auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
Seeds += (Seeds.empty() ? "" : ",") + SF;
CollectDFT(SF);
}
}
auto Time2 = std::chrono::system_clock::now();
auto DftTimeInSeconds = duration_cast<seconds>(Time2 - Time1).count();
@ -222,7 +239,16 @@ struct GlobalEnv {
auto U = FileToVector(Path);
auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
WriteToFile(U, NewPath);
Files.push_back(NewPath);
if (Group) { // Insert the queue according to the size of the seed.
size_t UnitSize = U.size();
auto Idx =
std::upper_bound(FilesSizes.begin(), FilesSizes.end(), UnitSize) -
FilesSizes.begin();
FilesSizes.insert(FilesSizes.begin() + Idx, UnitSize);
Files.insert(Files.begin() + Idx, NewPath);
} else {
Files.push_back(NewPath);
}
}
Features.insert(NewFeatures.begin(), NewFeatures.end());
Cov.insert(NewCov.begin(), NewCov.end());
@ -231,10 +257,8 @@ struct GlobalEnv {
if (TPC.PcIsFuncEntry(TE))
PrintPC(" NEW_FUNC: %p %F %L\n", "",
TPC.GetNextInstructionPc(TE->PC));
}
void CollectDFT(const std::string &InputPath) {
if (DataFlowBinary.empty()) return;
if (!FilesWithDFT.insert(InputPath).second) return;
@ -297,6 +321,7 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
Env.Verbosity = Options.Verbosity;
Env.ProcessStartTime = std::chrono::system_clock::now();
Env.DataFlowBinary = Options.CollectDataFlow;
Env.Group = Options.ForkCorpusGroups;
std::vector<SizedFile> SeedFiles;
for (auto &Dir : CorpusDirs)
@ -327,6 +352,12 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
Env.Cov.insert(NewFeatures.begin(), NewFeatures.end());
RemoveFile(CFPath);
}
if (Env.Group) {
for (auto &path : Env.Files)
Env.FilesSizes.push_back(FileSize(path));
}
Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
Env.Files.size(), Env.TempDir.c_str());
@ -341,6 +372,8 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
WriteToFile(Unit({1}), Env.StopFile());
};
size_t MergeCycle = 20;
size_t JobExecuted = 0;
size_t JobId = 1;
std::vector<std::thread> Threads;
for (int t = 0; t < NumJobs; t++) {
@ -362,6 +395,45 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
Env.RunOneMergeJob(Job.get());
// merge the corpus .
JobExecuted++;
if (Env.Group && JobExecuted >= MergeCycle) {
std::vector<SizedFile> CurrentSeedFiles;
for (auto &Dir : CorpusDirs)
GetSizedFilesFromDir(Dir, &CurrentSeedFiles);
std::sort(CurrentSeedFiles.begin(), CurrentSeedFiles.end());
auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
std::set<uint32_t> TmpNewFeatures, TmpNewCov;
std::set<uint32_t> TmpFeatures, TmpCov;
Env.Files.clear();
Env.FilesSizes.clear();
CrashResistantMerge(Env.Args, {}, CurrentSeedFiles, &Env.Files,
TmpFeatures, &TmpNewFeatures, TmpCov, &TmpNewCov,
CFPath, false);
for (auto &path : Env.Files)
Env.FilesSizes.push_back(FileSize(path));
RemoveFile(CFPath);
JobExecuted = 0;
MergeCycle += 5;
}
// Since the number of corpus seeds will gradually increase, in order to
// control the number in each group to be about three times the number of
// seeds selected each time, the number of groups is dynamically adjusted.
if (Env.Files.size() < 2000)
Env.NumCorpuses = 12;
else if (Env.Files.size() < 6000)
Env.NumCorpuses = 20;
else if (Env.Files.size() < 12000)
Env.NumCorpuses = 32;
else if (Env.Files.size() < 16000)
Env.NumCorpuses = 40;
else if (Env.Files.size() < 24000)
Env.NumCorpuses = 60;
else
Env.NumCorpuses = 80;
// Continue if our crash is one of the ignored ones.
if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode)
Env.NumTimeouts++;

View File

@ -47,6 +47,7 @@ struct FuzzingOptions {
int ReportSlowUnits = 10;
bool OnlyASCII = false;
bool Entropic = true;
bool ForkCorpusGroups = false;
size_t EntropicFeatureFrequencyThreshold = 0xFF;
size_t EntropicNumberOfRarestFeatures = 100;
bool EntropicScalePerExecTime = false;

View File

@ -0,0 +1,21 @@
# UNSUPPORTED: darwin, freebsd, aarch64
BINGO: BINGO
RUN: %cpp_compiler %S/SimpleTest.cpp -o %t-SimpleTest
RUN: not %run %t-SimpleTest -fork=1 -fork_corpus_groups=1 2>&1 | FileCheck %s --check-prefix=BINGO
TIMEOUT: ERROR: libFuzzer: timeout
RUN: %cpp_compiler %S/TimeoutTest.cpp -o %t-TimeoutTest
RUN: not %run %t-TimeoutTest -fork=1 -fork_corpus_groups=1 -timeout=1 -ignore_timeouts=0 2>&1 | FileCheck %s --check-prefix=TIMEOUT
OOM: ERROR: libFuzzer: out-of-memory
RUN: %cpp_compiler %S/OutOfMemoryTest.cpp -o %t-OutOfMemoryTest
RUN: not %run %t-OutOfMemoryTest -fork=1 -fork_corpus_groups=1 -ignore_ooms=0 -rss_limit_mb=128 2>&1 | FileCheck %s --check-prefix=OOM
# access-violation is the error thrown on Windows. Address will be smaller on i386.
CRASH: {{SEGV|access-violation}} on unknown address 0x00000000
RUN: %cpp_compiler %S/ShallowOOMDeepCrash.cpp -o %t-ShallowOOMDeepCrash
RUN: not %run %t-ShallowOOMDeepCrash -fork=1 -fork_corpus_groups=1 -rss_limit_mb=128 2>&1 | FileCheck %s --check-prefix=CRASH
MAX_TOTAL_TIME: INFO: fuzzed for {{.*}} seconds, wrapping up soon
MAX_TOTAL_TIME: INFO: exiting: {{.*}} time:
RUN: not %run %t-ShallowOOMDeepCrash -fork=1 -fork_corpus_groups=1 -rss_limit_mb=128 -ignore_crashes=1 -max_total_time=10 2>&1 | FileCheck %s --check-prefix=MAX_TOTAL_TIME