forked from OSchip/llvm-project
[libFuzzer] Scale energy assigned to each input based on input execution time.
This patch scales the energy computed by the Entropic schedule based on the execution time of each input. The input execution time is compared with the average execution time of inputs in the corpus, and, based on the amount by which they differ, the energy is scaled from 0.1x (for inputs executing slow) to 3x (for inputs executing fast). Note that the exact scaling criteria and formula is borrowed from AFL. On FuzzBench, this gives a sizeable throughput increase, which in turn leads to more coverage on several benchmarks. For details, see the following report. https://storage.googleapis.com/fuzzer-test-suite-public/exectime-report/index.html Differential Revision: https://reviews.llvm.org/D86092
This commit is contained in:
parent
952dc7f288
commit
5cda4dc7b4
|
@ -18,6 +18,7 @@
|
|||
#include "FuzzerSHA1.h"
|
||||
#include "FuzzerTracePC.h"
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <numeric>
|
||||
#include <random>
|
||||
#include <unordered_set>
|
||||
|
@ -26,6 +27,7 @@ namespace fuzzer {
|
|||
|
||||
struct InputInfo {
|
||||
Unit U; // The actual input data.
|
||||
std::chrono::microseconds TimeOfUnit;
|
||||
uint8_t Sha1[kSHA1NumBytes]; // Checksum.
|
||||
// Number of features that this input has and no smaller input has.
|
||||
size_t NumFeatures = 0;
|
||||
|
@ -62,11 +64,15 @@ struct InputInfo {
|
|||
}
|
||||
|
||||
// Assign more energy to a high-entropy seed, i.e., that reveals more
|
||||
// information about the globally rare features in the neighborhood
|
||||
// of the seed. Since we do not know the entropy of a seed that has
|
||||
// never been executed we assign fresh seeds maximum entropy and
|
||||
// let II->Energy approach the true entropy from above.
|
||||
void UpdateEnergy(size_t GlobalNumberOfFeatures) {
|
||||
// information about the globally rare features in the neighborhood of the
|
||||
// seed. Since we do not know the entropy of a seed that has never been
|
||||
// executed we assign fresh seeds maximum entropy and let II->Energy approach
|
||||
// the true entropy from above. If ScalePerExecTime is true, the computed
|
||||
// entropy is scaled based on how fast this input executes compared to the
|
||||
// average execution time of inputs. The faster an input executes, the more
|
||||
// energy gets assigned to the input.
|
||||
void UpdateEnergy(size_t GlobalNumberOfFeatures, bool ScalePerExecTime,
|
||||
std::chrono::microseconds AverageUnitExecutionTime) {
|
||||
Energy = 0.0;
|
||||
SumIncidence = 0;
|
||||
|
||||
|
@ -89,6 +95,27 @@ struct InputInfo {
|
|||
// Normalize.
|
||||
if (SumIncidence != 0)
|
||||
Energy = (Energy / SumIncidence) + logl(SumIncidence);
|
||||
|
||||
if (ScalePerExecTime) {
|
||||
// Scaling to favor inputs with lower execution time.
|
||||
uint32_t PerfScore = 100;
|
||||
if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 10)
|
||||
PerfScore = 10;
|
||||
else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 4)
|
||||
PerfScore = 25;
|
||||
else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 2)
|
||||
PerfScore = 50;
|
||||
else if (TimeOfUnit.count() * 3 > AverageUnitExecutionTime.count() * 4)
|
||||
PerfScore = 75;
|
||||
else if (TimeOfUnit.count() * 4 < AverageUnitExecutionTime.count())
|
||||
PerfScore = 300;
|
||||
else if (TimeOfUnit.count() * 3 < AverageUnitExecutionTime.count())
|
||||
PerfScore = 200;
|
||||
else if (TimeOfUnit.count() * 2 < AverageUnitExecutionTime.count())
|
||||
PerfScore = 150;
|
||||
|
||||
Energy *= PerfScore;
|
||||
}
|
||||
}
|
||||
|
||||
// Increment the frequency of the feature Idx.
|
||||
|
@ -121,6 +148,7 @@ struct EntropicOptions {
|
|||
bool Enabled;
|
||||
size_t NumberOfRarestFeatures;
|
||||
size_t FeatureFrequencyThreshold;
|
||||
bool ScalePerExecTime;
|
||||
};
|
||||
|
||||
class InputCorpus {
|
||||
|
@ -179,6 +207,7 @@ public:
|
|||
const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
|
||||
InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile,
|
||||
bool HasFocusFunction, bool NeverReduce,
|
||||
std::chrono::microseconds TimeOfUnit,
|
||||
const Vector<uint32_t> &FeatureSet,
|
||||
const DataFlowTrace &DFT, const InputInfo *BaseII) {
|
||||
assert(!U.empty());
|
||||
|
@ -189,6 +218,7 @@ public:
|
|||
II.U = U;
|
||||
II.NumFeatures = NumFeatures;
|
||||
II.NeverReduce = NeverReduce;
|
||||
II.TimeOfUnit = TimeOfUnit;
|
||||
II.MayDeleteFile = MayDeleteFile;
|
||||
II.UniqFeatureSet = FeatureSet;
|
||||
II.HasFocusFunction = HasFocusFunction;
|
||||
|
@ -471,12 +501,19 @@ private:
|
|||
Weights.resize(N);
|
||||
std::iota(Intervals.begin(), Intervals.end(), 0);
|
||||
|
||||
std::chrono::microseconds AverageUnitExecutionTime(0);
|
||||
for (auto II : Inputs) {
|
||||
AverageUnitExecutionTime += II->TimeOfUnit;
|
||||
}
|
||||
AverageUnitExecutionTime /= N;
|
||||
|
||||
bool VanillaSchedule = true;
|
||||
if (Entropic.Enabled) {
|
||||
for (auto II : Inputs) {
|
||||
if (II->NeedsEnergyUpdate && II->Energy != 0.0) {
|
||||
II->NeedsEnergyUpdate = false;
|
||||
II->UpdateEnergy(RareFeatures.size());
|
||||
II->UpdateEnergy(RareFeatures.size(), Entropic.ScalePerExecTime,
|
||||
AverageUnitExecutionTime);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -764,6 +764,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
|
|||
(size_t)Flags.entropic_feature_frequency_threshold;
|
||||
Options.EntropicNumberOfRarestFeatures =
|
||||
(size_t)Flags.entropic_number_of_rarest_features;
|
||||
Options.EntropicScalePerExecTime = Flags.entropic_scale_per_exec_time;
|
||||
if (Options.Entropic) {
|
||||
if (!Options.FocusFunction.empty()) {
|
||||
Printf("ERROR: The parameters `--entropic` and `--focus_function` cannot "
|
||||
|
@ -779,6 +780,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
|
|||
Entropic.FeatureFrequencyThreshold =
|
||||
Options.EntropicFeatureFrequencyThreshold;
|
||||
Entropic.NumberOfRarestFeatures = Options.EntropicNumberOfRarestFeatures;
|
||||
Entropic.ScalePerExecTime = Options.EntropicScalePerExecTime;
|
||||
|
||||
unsigned Seed = Flags.seed;
|
||||
// Initialize Seed.
|
||||
|
|
|
@ -175,6 +175,11 @@ FUZZER_FLAG_INT(entropic_number_of_rarest_features, 100, "Experimental. If "
|
|||
"entropic is enabled, we keep track of the frequencies only for the "
|
||||
"Top-X least abundant features (union features that are considered as "
|
||||
"rare).")
|
||||
FUZZER_FLAG_INT(entropic_scale_per_exec_time, 0, "Experimental. If 1, "
|
||||
"the Entropic power schedule gets scaled based on the input execution "
|
||||
"time. Inputs with lower execution time get scheduled more (up to 30x). "
|
||||
"Note that, if 1, fuzzer stops from being deterministic even if a "
|
||||
"non-zero random seed is given.")
|
||||
|
||||
FUZZER_FLAG_INT(analyze_dict, 0, "Experimental")
|
||||
FUZZER_DEPRECATED_FLAG(use_clang_coverage)
|
||||
|
|
|
@ -470,6 +470,7 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
|
|||
return false;
|
||||
|
||||
ExecuteCallback(Data, Size);
|
||||
auto TimeOfUnit = duration_cast<microseconds>(UnitStopTime - UnitStartTime);
|
||||
|
||||
UniqFeatureSetTmp.clear();
|
||||
size_t FoundUniqFeaturesOfII = 0;
|
||||
|
@ -493,7 +494,7 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
|
|||
auto NewII =
|
||||
Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile,
|
||||
TPC.ObservedFocusFunction(), ForceAddToCorpus,
|
||||
UniqFeatureSetTmp, DFT, II);
|
||||
TimeOfUnit, UniqFeatureSetTmp, DFT, II);
|
||||
WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1),
|
||||
NewII->UniqFeatureSet);
|
||||
return true;
|
||||
|
|
|
@ -49,6 +49,7 @@ struct FuzzingOptions {
|
|||
bool Entropic = false;
|
||||
size_t EntropicFeatureFrequencyThreshold = 0xFF;
|
||||
size_t EntropicNumberOfRarestFeatures = 100;
|
||||
bool EntropicScalePerExecTime = false;
|
||||
std::string OutputCorpus;
|
||||
std::string ArtifactPrefix = "./";
|
||||
std::string ExactArtifactPath;
|
||||
|
|
|
@ -599,7 +599,9 @@ TEST(Corpus, Distribution) {
|
|||
for (size_t i = 0; i < N; i++)
|
||||
C->AddToCorpus(Unit{static_cast<uint8_t>(i)}, /*NumFeatures*/ 1,
|
||||
/*MayDeleteFile*/ false, /*HasFocusFunction*/ false,
|
||||
/*ForceAddToCorpus*/ false, /*FeatureSet*/ {}, DFT,
|
||||
/*ForceAddToCorpus*/ false,
|
||||
/*TimeOfUnit*/ std::chrono::microseconds(0),
|
||||
/*FeatureSet*/ {}, DFT,
|
||||
/*BaseII*/ nullptr);
|
||||
|
||||
Vector<size_t> Hist(N);
|
||||
|
@ -1101,17 +1103,17 @@ TEST(Entropic, ComputeEnergy) {
|
|||
Vector<std::pair<uint32_t, uint16_t>> FeatureFreqs = {{1, 3}, {2, 3}, {3, 3}};
|
||||
II->FeatureFreqs = FeatureFreqs;
|
||||
II->NumExecutedMutations = 0;
|
||||
II->UpdateEnergy(4);
|
||||
II->UpdateEnergy(4, false, std::chrono::microseconds(0));
|
||||
EXPECT_LT(SubAndSquare(II->Energy, 1.450805), Precision);
|
||||
|
||||
II->NumExecutedMutations = 9;
|
||||
II->UpdateEnergy(5);
|
||||
II->UpdateEnergy(5, false, std::chrono::microseconds(0));
|
||||
EXPECT_LT(SubAndSquare(II->Energy, 1.525496), Precision);
|
||||
|
||||
II->FeatureFreqs[0].second++;
|
||||
II->FeatureFreqs.push_back(std::pair<uint32_t, uint16_t>(42, 6));
|
||||
II->NumExecutedMutations = 20;
|
||||
II->UpdateEnergy(10);
|
||||
II->UpdateEnergy(10, false, std::chrono::microseconds(0));
|
||||
EXPECT_LT(SubAndSquare(II->Energy, 1.792831), Precision);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
// Tests whether scaling the Entropic scheduling weight based on input execution
|
||||
// time is effective or not. Inputs of size 10 will take at least 100
|
||||
// microseconds more than any input of size 1-9. The input of size 2 in the
|
||||
// corpus should be favored by the exec-time-scaled Entropic scheduling policy
|
||||
// than the input of size 10 in the corpus, eventually finding the crashing
|
||||
// input {0xab, 0xcd} with less executions.
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <thread>
|
||||
|
||||
static volatile int Sink;
|
||||
static volatile int *Nil = nullptr;
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
|
||||
if (Size > 10)
|
||||
return 0; // To make the test quicker.
|
||||
|
||||
if (Size == 10) {
|
||||
size_t ExecTimeUSec = 100;
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(ExecTimeUSec));
|
||||
|
||||
Sink = 0; // execute a lot slower than the crashing input below.
|
||||
}
|
||||
|
||||
if (Size == 2 && Data[0] == 0xab && Data[1] == 0xcd)
|
||||
*Nil = 42; // crash.
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
REQUIRES: linux, x86_64
|
||||
RUN: %cpp_compiler %S/EntropicScalePerExecTimeTest.cpp -o %t-EntropicScalePerExecTimeTest
|
||||
RUN: not %run %t-EntropicScalePerExecTimeTest -entropic=1 -entropic_scale_per_exec_time=1 -seed=1 -runs=100000 -max_len=10
|
||||
|
||||
# The following test is added as a comment here for reference, which should
|
||||
# take more runs than with -entropic_scale_per_exec_time=1 to find the crash.
|
||||
# (it takes 126,633 runs)
|
||||
# RUN: not %run %t-EntropicScalePerExecTimeTest -entropic=1 -seed=1 -runs=200000 -max_len=10
|
Loading…
Reference in New Issue