[libFuzzer] Scale energy assigned to each input based on input execution time.

This patch scales the energy computed by the Entropic schedule based on the
execution time of each input. The input execution time is compared with the
average execution time of inputs in the corpus, and, based on the amount by
which they differ, the energy is scaled from 0.1x (for inputs executing slow) to
3x (for inputs executing fast). Note that the exact scaling criteria and formula
is borrowed from AFL.

On FuzzBench, this gives a sizeable throughput increase, which in turn leads to
more coverage on several benchmarks. For details, see the following report.

https://storage.googleapis.com/fuzzer-test-suite-public/exectime-report/index.html

Differential Revision: https://reviews.llvm.org/D86092
This commit is contained in:
Dokyung Song 2020-08-17 16:59:59 +00:00
parent 952dc7f288
commit 5cda4dc7b4
8 changed files with 100 additions and 11 deletions

View File

@ -18,6 +18,7 @@
#include "FuzzerSHA1.h"
#include "FuzzerTracePC.h"
#include <algorithm>
#include <chrono>
#include <numeric>
#include <random>
#include <unordered_set>
@ -26,6 +27,7 @@ namespace fuzzer {
struct InputInfo {
Unit U; // The actual input data.
std::chrono::microseconds TimeOfUnit;
uint8_t Sha1[kSHA1NumBytes]; // Checksum.
// Number of features that this input has and no smaller input has.
size_t NumFeatures = 0;
@ -62,11 +64,15 @@ struct InputInfo {
}
// Assign more energy to a high-entropy seed, i.e., that reveals more
// information about the globally rare features in the neighborhood
// of the seed. Since we do not know the entropy of a seed that has
// never been executed we assign fresh seeds maximum entropy and
// let II->Energy approach the true entropy from above.
void UpdateEnergy(size_t GlobalNumberOfFeatures) {
// information about the globally rare features in the neighborhood of the
// seed. Since we do not know the entropy of a seed that has never been
// executed we assign fresh seeds maximum entropy and let II->Energy approach
// the true entropy from above. If ScalePerExecTime is true, the computed
// entropy is scaled based on how fast this input executes compared to the
// average execution time of inputs. The faster an input executes, the more
// energy gets assigned to the input.
void UpdateEnergy(size_t GlobalNumberOfFeatures, bool ScalePerExecTime,
std::chrono::microseconds AverageUnitExecutionTime) {
Energy = 0.0;
SumIncidence = 0;
@ -89,6 +95,27 @@ struct InputInfo {
// Normalize.
if (SumIncidence != 0)
Energy = (Energy / SumIncidence) + logl(SumIncidence);
if (ScalePerExecTime) {
// Scaling to favor inputs with lower execution time.
uint32_t PerfScore = 100;
if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 10)
PerfScore = 10;
else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 4)
PerfScore = 25;
else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 2)
PerfScore = 50;
else if (TimeOfUnit.count() * 3 > AverageUnitExecutionTime.count() * 4)
PerfScore = 75;
else if (TimeOfUnit.count() * 4 < AverageUnitExecutionTime.count())
PerfScore = 300;
else if (TimeOfUnit.count() * 3 < AverageUnitExecutionTime.count())
PerfScore = 200;
else if (TimeOfUnit.count() * 2 < AverageUnitExecutionTime.count())
PerfScore = 150;
Energy *= PerfScore;
}
}
// Increment the frequency of the feature Idx.
@ -121,6 +148,7 @@ struct EntropicOptions {
bool Enabled;
size_t NumberOfRarestFeatures;
size_t FeatureFrequencyThreshold;
bool ScalePerExecTime;
};
class InputCorpus {
@ -179,6 +207,7 @@ public:
const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile,
bool HasFocusFunction, bool NeverReduce,
std::chrono::microseconds TimeOfUnit,
const Vector<uint32_t> &FeatureSet,
const DataFlowTrace &DFT, const InputInfo *BaseII) {
assert(!U.empty());
@ -189,6 +218,7 @@ public:
II.U = U;
II.NumFeatures = NumFeatures;
II.NeverReduce = NeverReduce;
II.TimeOfUnit = TimeOfUnit;
II.MayDeleteFile = MayDeleteFile;
II.UniqFeatureSet = FeatureSet;
II.HasFocusFunction = HasFocusFunction;
@ -471,12 +501,19 @@ private:
Weights.resize(N);
std::iota(Intervals.begin(), Intervals.end(), 0);
std::chrono::microseconds AverageUnitExecutionTime(0);
for (auto II : Inputs) {
AverageUnitExecutionTime += II->TimeOfUnit;
}
AverageUnitExecutionTime /= N;
bool VanillaSchedule = true;
if (Entropic.Enabled) {
for (auto II : Inputs) {
if (II->NeedsEnergyUpdate && II->Energy != 0.0) {
II->NeedsEnergyUpdate = false;
II->UpdateEnergy(RareFeatures.size());
II->UpdateEnergy(RareFeatures.size(), Entropic.ScalePerExecTime,
AverageUnitExecutionTime);
}
}

View File

@ -764,6 +764,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
(size_t)Flags.entropic_feature_frequency_threshold;
Options.EntropicNumberOfRarestFeatures =
(size_t)Flags.entropic_number_of_rarest_features;
Options.EntropicScalePerExecTime = Flags.entropic_scale_per_exec_time;
if (Options.Entropic) {
if (!Options.FocusFunction.empty()) {
Printf("ERROR: The parameters `--entropic` and `--focus_function` cannot "
@ -779,6 +780,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
Entropic.FeatureFrequencyThreshold =
Options.EntropicFeatureFrequencyThreshold;
Entropic.NumberOfRarestFeatures = Options.EntropicNumberOfRarestFeatures;
Entropic.ScalePerExecTime = Options.EntropicScalePerExecTime;
unsigned Seed = Flags.seed;
// Initialize Seed.

View File

@ -175,6 +175,11 @@ FUZZER_FLAG_INT(entropic_number_of_rarest_features, 100, "Experimental. If "
"entropic is enabled, we keep track of the frequencies only for the "
"Top-X least abundant features (union features that are considered as "
"rare).")
FUZZER_FLAG_INT(entropic_scale_per_exec_time, 0, "Experimental. If 1, "
"the Entropic power schedule gets scaled based on the input execution "
"time. Inputs with lower execution time get scheduled more (up to 30x). "
"Note that, if 1, fuzzer stops from being deterministic even if a "
"non-zero random seed is given.")
FUZZER_FLAG_INT(analyze_dict, 0, "Experimental")
FUZZER_DEPRECATED_FLAG(use_clang_coverage)

View File

@ -470,6 +470,7 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
return false;
ExecuteCallback(Data, Size);
auto TimeOfUnit = duration_cast<microseconds>(UnitStopTime - UnitStartTime);
UniqFeatureSetTmp.clear();
size_t FoundUniqFeaturesOfII = 0;
@ -493,7 +494,7 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
auto NewII =
Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile,
TPC.ObservedFocusFunction(), ForceAddToCorpus,
UniqFeatureSetTmp, DFT, II);
TimeOfUnit, UniqFeatureSetTmp, DFT, II);
WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1),
NewII->UniqFeatureSet);
return true;

View File

@ -49,6 +49,7 @@ struct FuzzingOptions {
bool Entropic = false;
size_t EntropicFeatureFrequencyThreshold = 0xFF;
size_t EntropicNumberOfRarestFeatures = 100;
bool EntropicScalePerExecTime = false;
std::string OutputCorpus;
std::string ArtifactPrefix = "./";
std::string ExactArtifactPath;

View File

@ -599,7 +599,9 @@ TEST(Corpus, Distribution) {
for (size_t i = 0; i < N; i++)
C->AddToCorpus(Unit{static_cast<uint8_t>(i)}, /*NumFeatures*/ 1,
/*MayDeleteFile*/ false, /*HasFocusFunction*/ false,
/*ForceAddToCorpus*/ false, /*FeatureSet*/ {}, DFT,
/*ForceAddToCorpus*/ false,
/*TimeOfUnit*/ std::chrono::microseconds(0),
/*FeatureSet*/ {}, DFT,
/*BaseII*/ nullptr);
Vector<size_t> Hist(N);
@ -1101,17 +1103,17 @@ TEST(Entropic, ComputeEnergy) {
Vector<std::pair<uint32_t, uint16_t>> FeatureFreqs = {{1, 3}, {2, 3}, {3, 3}};
II->FeatureFreqs = FeatureFreqs;
II->NumExecutedMutations = 0;
II->UpdateEnergy(4);
II->UpdateEnergy(4, false, std::chrono::microseconds(0));
EXPECT_LT(SubAndSquare(II->Energy, 1.450805), Precision);
II->NumExecutedMutations = 9;
II->UpdateEnergy(5);
II->UpdateEnergy(5, false, std::chrono::microseconds(0));
EXPECT_LT(SubAndSquare(II->Energy, 1.525496), Precision);
II->FeatureFreqs[0].second++;
II->FeatureFreqs.push_back(std::pair<uint32_t, uint16_t>(42, 6));
II->NumExecutedMutations = 20;
II->UpdateEnergy(10);
II->UpdateEnergy(10, false, std::chrono::microseconds(0));
EXPECT_LT(SubAndSquare(II->Energy, 1.792831), Precision);
}

View File

@ -0,0 +1,33 @@
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Tests whether scaling the Entropic scheduling weight based on input execution
// time is effective or not. Inputs of size 10 will take at least 100
// microseconds more than any input of size 1-9. The input of size 2 in the
// corpus should be favored by the exec-time-scaled Entropic scheduling policy
// than the input of size 10 in the corpus, eventually finding the crashing
// input {0xab, 0xcd} with less executions.
#include <chrono>
#include <cstdint>
#include <thread>
static volatile int Sink;
static volatile int *Nil = nullptr;
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
if (Size > 10)
return 0; // To make the test quicker.
if (Size == 10) {
size_t ExecTimeUSec = 100;
std::this_thread::sleep_for(std::chrono::microseconds(ExecTimeUSec));
Sink = 0; // execute a lot slower than the crashing input below.
}
if (Size == 2 && Data[0] == 0xab && Data[1] == 0xcd)
*Nil = 42; // crash.
return 0;
}

View File

@ -0,0 +1,8 @@
REQUIRES: linux, x86_64
RUN: %cpp_compiler %S/EntropicScalePerExecTimeTest.cpp -o %t-EntropicScalePerExecTimeTest
RUN: not %run %t-EntropicScalePerExecTimeTest -entropic=1 -entropic_scale_per_exec_time=1 -seed=1 -runs=100000 -max_len=10
# The following test is added as a comment here for reference, which should
# take more runs than with -entropic_scale_per_exec_time=1 to find the crash.
# (it takes 126,633 runs)
# RUN: not %run %t-EntropicScalePerExecTimeTest -entropic=1 -seed=1 -runs=200000 -max_len=10