forked from OSchip/llvm-project
Redistribute energy for Corpus
I found that the initial corpus allocation of fork mode has certain defects. I designed a new initial corpus allocation strategy based on size grouping. This method can give more energy to the small seeds in the corpus and increase the throughput of the test. Fuzzbench data (glibfuzzer is -fork_corpus_groups=1): https://www.fuzzbench.com/reports/experimental/2021-08-05-parallel/index.html Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D105084
This commit is contained in:
parent
aecd75f095
commit
a30dbbe924
|
@ -870,6 +870,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
|
|||
exit(0);
|
||||
}
|
||||
|
||||
Options.ForkCorpusGroups = Flags.fork_corpus_groups;
|
||||
if (Flags.fork)
|
||||
FuzzWithFork(F->GetMD().GetRand(), Options, Args, *Inputs, Flags.fork);
|
||||
|
||||
|
|
|
@ -58,6 +58,10 @@ FUZZER_FLAG_INT(max_total_time, 0, "If positive, indicates the maximal total "
|
|||
FUZZER_FLAG_INT(help, 0, "Print help.")
|
||||
FUZZER_FLAG_INT(fork, 0, "Experimental mode where fuzzing happens "
|
||||
"in a subprocess")
|
||||
FUZZER_FLAG_INT(fork_corpus_groups, 0, "For fork mode, enable the corpus-group "
|
||||
"strategy, The main corpus will be grouped according to size, "
|
||||
"and each sub-process will randomly select seeds from different "
|
||||
"groups as the sub-corpus.")
|
||||
FUZZER_FLAG_INT(ignore_timeouts, 1, "Ignore timeouts in fork mode")
|
||||
FUZZER_FLAG_INT(ignore_ooms, 1, "Ignore OOMs in fork mode")
|
||||
FUZZER_FLAG_INT(ignore_crashes, 0, "Ignore crashes in fork mode")
|
||||
|
|
|
@ -95,9 +95,12 @@ struct GlobalEnv {
|
|||
std::set<uint32_t> Features, Cov;
|
||||
std::set<std::string> FilesWithDFT;
|
||||
std::vector<std::string> Files;
|
||||
std::vector<std::size_t> FilesSizes;
|
||||
Random *Rand;
|
||||
std::chrono::system_clock::time_point ProcessStartTime;
|
||||
int Verbosity = 0;
|
||||
int Group = 0;
|
||||
int NumCorpuses = 8;
|
||||
|
||||
size_t NumTimeouts = 0;
|
||||
size_t NumOOMs = 0;
|
||||
|
@ -136,10 +139,24 @@ struct GlobalEnv {
|
|||
if (size_t CorpusSubsetSize =
|
||||
std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) {
|
||||
auto Time1 = std::chrono::system_clock::now();
|
||||
for (size_t i = 0; i < CorpusSubsetSize; i++) {
|
||||
auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
|
||||
Seeds += (Seeds.empty() ? "" : ",") + SF;
|
||||
CollectDFT(SF);
|
||||
if (Group) { // whether to group the corpus.
|
||||
size_t AverageCorpusSize = Files.size() / NumCorpuses + 1;
|
||||
size_t StartIndex = ((JobId - 1) % NumCorpuses) * AverageCorpusSize;
|
||||
for (size_t i = 0; i < CorpusSubsetSize; i++) {
|
||||
size_t RandNum = (*Rand)(AverageCorpusSize);
|
||||
size_t Index = RandNum + StartIndex;
|
||||
Index = Index < Files.size() ? Index
|
||||
: Rand->SkewTowardsLast(Files.size());
|
||||
auto &SF = Files[Index];
|
||||
Seeds += (Seeds.empty() ? "" : ",") + SF;
|
||||
CollectDFT(SF);
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < CorpusSubsetSize; i++) {
|
||||
auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
|
||||
Seeds += (Seeds.empty() ? "" : ",") + SF;
|
||||
CollectDFT(SF);
|
||||
}
|
||||
}
|
||||
auto Time2 = std::chrono::system_clock::now();
|
||||
auto DftTimeInSeconds = duration_cast<seconds>(Time2 - Time1).count();
|
||||
|
@ -222,7 +239,16 @@ struct GlobalEnv {
|
|||
auto U = FileToVector(Path);
|
||||
auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
|
||||
WriteToFile(U, NewPath);
|
||||
Files.push_back(NewPath);
|
||||
if (Group) { // Insert the queue according to the size of the seed.
|
||||
size_t UnitSize = U.size();
|
||||
auto Idx =
|
||||
std::upper_bound(FilesSizes.begin(), FilesSizes.end(), UnitSize) -
|
||||
FilesSizes.begin();
|
||||
FilesSizes.insert(FilesSizes.begin() + Idx, UnitSize);
|
||||
Files.insert(Files.begin() + Idx, NewPath);
|
||||
} else {
|
||||
Files.push_back(NewPath);
|
||||
}
|
||||
}
|
||||
Features.insert(NewFeatures.begin(), NewFeatures.end());
|
||||
Cov.insert(NewCov.begin(), NewCov.end());
|
||||
|
@ -231,10 +257,8 @@ struct GlobalEnv {
|
|||
if (TPC.PcIsFuncEntry(TE))
|
||||
PrintPC(" NEW_FUNC: %p %F %L\n", "",
|
||||
TPC.GetNextInstructionPc(TE->PC));
|
||||
|
||||
}
|
||||
|
||||
|
||||
void CollectDFT(const std::string &InputPath) {
|
||||
if (DataFlowBinary.empty()) return;
|
||||
if (!FilesWithDFT.insert(InputPath).second) return;
|
||||
|
@ -297,6 +321,7 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
|
|||
Env.Verbosity = Options.Verbosity;
|
||||
Env.ProcessStartTime = std::chrono::system_clock::now();
|
||||
Env.DataFlowBinary = Options.CollectDataFlow;
|
||||
Env.Group = Options.ForkCorpusGroups;
|
||||
|
||||
std::vector<SizedFile> SeedFiles;
|
||||
for (auto &Dir : CorpusDirs)
|
||||
|
@ -327,6 +352,12 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
|
|||
Env.Cov.insert(NewFeatures.begin(), NewFeatures.end());
|
||||
RemoveFile(CFPath);
|
||||
}
|
||||
|
||||
if (Env.Group) {
|
||||
for (auto &path : Env.Files)
|
||||
Env.FilesSizes.push_back(FileSize(path));
|
||||
}
|
||||
|
||||
Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
|
||||
Env.Files.size(), Env.TempDir.c_str());
|
||||
|
||||
|
@ -341,6 +372,8 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
|
|||
WriteToFile(Unit({1}), Env.StopFile());
|
||||
};
|
||||
|
||||
size_t MergeCycle = 20;
|
||||
size_t JobExecuted = 0;
|
||||
size_t JobId = 1;
|
||||
std::vector<std::thread> Threads;
|
||||
for (int t = 0; t < NumJobs; t++) {
|
||||
|
@ -362,6 +395,45 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
|
|||
|
||||
Env.RunOneMergeJob(Job.get());
|
||||
|
||||
// merge the corpus .
|
||||
JobExecuted++;
|
||||
if (Env.Group && JobExecuted >= MergeCycle) {
|
||||
std::vector<SizedFile> CurrentSeedFiles;
|
||||
for (auto &Dir : CorpusDirs)
|
||||
GetSizedFilesFromDir(Dir, &CurrentSeedFiles);
|
||||
std::sort(CurrentSeedFiles.begin(), CurrentSeedFiles.end());
|
||||
|
||||
auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
|
||||
std::set<uint32_t> TmpNewFeatures, TmpNewCov;
|
||||
std::set<uint32_t> TmpFeatures, TmpCov;
|
||||
Env.Files.clear();
|
||||
Env.FilesSizes.clear();
|
||||
CrashResistantMerge(Env.Args, {}, CurrentSeedFiles, &Env.Files,
|
||||
TmpFeatures, &TmpNewFeatures, TmpCov, &TmpNewCov,
|
||||
CFPath, false);
|
||||
for (auto &path : Env.Files)
|
||||
Env.FilesSizes.push_back(FileSize(path));
|
||||
RemoveFile(CFPath);
|
||||
JobExecuted = 0;
|
||||
MergeCycle += 5;
|
||||
}
|
||||
|
||||
// Since the number of corpus seeds will gradually increase, in order to
|
||||
// control the number in each group to be about three times the number of
|
||||
// seeds selected each time, the number of groups is dynamically adjusted.
|
||||
if (Env.Files.size() < 2000)
|
||||
Env.NumCorpuses = 12;
|
||||
else if (Env.Files.size() < 6000)
|
||||
Env.NumCorpuses = 20;
|
||||
else if (Env.Files.size() < 12000)
|
||||
Env.NumCorpuses = 32;
|
||||
else if (Env.Files.size() < 16000)
|
||||
Env.NumCorpuses = 40;
|
||||
else if (Env.Files.size() < 24000)
|
||||
Env.NumCorpuses = 60;
|
||||
else
|
||||
Env.NumCorpuses = 80;
|
||||
|
||||
// Continue if our crash is one of the ignored ones.
|
||||
if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode)
|
||||
Env.NumTimeouts++;
|
||||
|
|
|
@ -47,6 +47,7 @@ struct FuzzingOptions {
|
|||
int ReportSlowUnits = 10;
|
||||
bool OnlyASCII = false;
|
||||
bool Entropic = true;
|
||||
bool ForkCorpusGroups = false;
|
||||
size_t EntropicFeatureFrequencyThreshold = 0xFF;
|
||||
size_t EntropicNumberOfRarestFeatures = 100;
|
||||
bool EntropicScalePerExecTime = false;
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
# UNSUPPORTED: darwin, freebsd, aarch64
|
||||
BINGO: BINGO
|
||||
RUN: %cpp_compiler %S/SimpleTest.cpp -o %t-SimpleTest
|
||||
RUN: not %run %t-SimpleTest -fork=1 -fork_corpus_groups=1 2>&1 | FileCheck %s --check-prefix=BINGO
|
||||
|
||||
TIMEOUT: ERROR: libFuzzer: timeout
|
||||
RUN: %cpp_compiler %S/TimeoutTest.cpp -o %t-TimeoutTest
|
||||
RUN: not %run %t-TimeoutTest -fork=1 -fork_corpus_groups=1 -timeout=1 -ignore_timeouts=0 2>&1 | FileCheck %s --check-prefix=TIMEOUT
|
||||
|
||||
OOM: ERROR: libFuzzer: out-of-memory
|
||||
RUN: %cpp_compiler %S/OutOfMemoryTest.cpp -o %t-OutOfMemoryTest
|
||||
RUN: not %run %t-OutOfMemoryTest -fork=1 -fork_corpus_groups=1 -ignore_ooms=0 -rss_limit_mb=128 2>&1 | FileCheck %s --check-prefix=OOM
|
||||
|
||||
# access-violation is the error thrown on Windows. Address will be smaller on i386.
|
||||
CRASH: {{SEGV|access-violation}} on unknown address 0x00000000
|
||||
RUN: %cpp_compiler %S/ShallowOOMDeepCrash.cpp -o %t-ShallowOOMDeepCrash
|
||||
RUN: not %run %t-ShallowOOMDeepCrash -fork=1 -fork_corpus_groups=1 -rss_limit_mb=128 2>&1 | FileCheck %s --check-prefix=CRASH
|
||||
|
||||
MAX_TOTAL_TIME: INFO: fuzzed for {{.*}} seconds, wrapping up soon
|
||||
MAX_TOTAL_TIME: INFO: exiting: {{.*}} time:
|
||||
RUN: not %run %t-ShallowOOMDeepCrash -fork=1 -fork_corpus_groups=1 -rss_limit_mb=128 -ignore_crashes=1 -max_total_time=10 2>&1 | FileCheck %s --check-prefix=MAX_TOTAL_TIME
|
Loading…
Reference in New Issue