forked from OSchip/llvm-project
354 lines
11 KiB
C++
354 lines
11 KiB
C++
//===-- Benchmark ---------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "JSON.h"
|
|
#include "LibcBenchmark.h"
|
|
#include "LibcMemoryBenchmark.h"
|
|
#include "MemorySizeDistributions.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
#include "llvm/Support/FileSystem.h"
|
|
#include "llvm/Support/JSON.h"
|
|
#include "llvm/Support/MemoryBuffer.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
namespace __llvm_libc {
|
|
|
|
extern void *memcpy(void *__restrict, const void *__restrict, size_t);
|
|
extern void *memset(void *, int, size_t);
|
|
|
|
} // namespace __llvm_libc
|
|
|
|
namespace llvm {
|
|
namespace libc_benchmarks {
|
|
|
|
enum Function { memcpy, memset };
|
|
|
|
static cl::opt<std::string>
|
|
StudyName("study-name", cl::desc("The name for this study"), cl::Required);
|
|
|
|
static cl::opt<Function>
|
|
MemoryFunction("function", cl::desc("Sets the function to benchmark:"),
|
|
cl::values(clEnumVal(memcpy, "__llvm_libc::memcpy"),
|
|
clEnumVal(memset, "__llvm_libc::memset")),
|
|
cl::Required);
|
|
|
|
static cl::opt<std::string>
|
|
SizeDistributionName("size-distribution-name",
|
|
cl::desc("The name of the distribution to use"));
|
|
|
|
static cl::opt<bool>
|
|
SweepMode("sweep-mode",
|
|
cl::desc("If set, benchmark all sizes from 0 to sweep-max-size"));
|
|
|
|
static cl::opt<uint32_t>
|
|
SweepMaxSize("sweep-max-size",
|
|
cl::desc("The maximum size to use in sweep-mode"),
|
|
cl::init(256));
|
|
|
|
static cl::opt<uint32_t>
|
|
AlignedAccess("aligned-access",
|
|
cl::desc("The alignment to use when accessing the buffers\n"
|
|
"Default is unaligned\n"
|
|
"Use 0 to disable address randomization"),
|
|
cl::init(1));
|
|
|
|
static cl::opt<std::string> Output("output",
|
|
cl::desc("Specify output filename"),
|
|
cl::value_desc("filename"), cl::init("-"));
|
|
|
|
static cl::opt<uint32_t>
|
|
NumTrials("num-trials", cl::desc("The number of benchmarks run to perform"),
|
|
cl::init(1));
|
|
|
|
static constexpr int64_t KiB = 1024;
|
|
static constexpr int64_t ParameterStorageBytes = 4 * KiB;
|
|
static constexpr int64_t L1LeftAsideBytes = 1 * KiB;
|
|
|
|
struct ParameterType {
|
|
unsigned OffsetBytes : 16; // max : 16 KiB - 1
|
|
unsigned SizeBytes : 16; // max : 16 KiB - 1
|
|
};
|
|
|
|
struct MemcpyBenchmark {
|
|
static constexpr auto GetDistributions = &getMemcpySizeDistributions;
|
|
static constexpr size_t BufferCount = 2;
|
|
static void amend(Study &S) { S.Configuration.Function = "memcpy"; }
|
|
|
|
MemcpyBenchmark(const size_t BufferSize)
|
|
: SrcBuffer(BufferSize), DstBuffer(BufferSize) {}
|
|
|
|
inline auto functor() {
|
|
return [this](ParameterType P) {
|
|
__llvm_libc::memcpy(DstBuffer + P.OffsetBytes, SrcBuffer + P.OffsetBytes,
|
|
P.SizeBytes);
|
|
return DstBuffer + P.OffsetBytes;
|
|
};
|
|
}
|
|
|
|
AlignedBuffer SrcBuffer;
|
|
AlignedBuffer DstBuffer;
|
|
};
|
|
|
|
struct MemsetBenchmark {
|
|
static constexpr auto GetDistributions = &getMemsetSizeDistributions;
|
|
static constexpr size_t BufferCount = 1;
|
|
static void amend(Study &S) { S.Configuration.Function = "memset"; }
|
|
|
|
MemsetBenchmark(const size_t BufferSize) : DstBuffer(BufferSize) {}
|
|
|
|
inline auto functor() {
|
|
return [this](ParameterType P) {
|
|
__llvm_libc::memset(DstBuffer + P.OffsetBytes, P.OffsetBytes & 0xFF,
|
|
P.SizeBytes);
|
|
return DstBuffer + P.OffsetBytes;
|
|
};
|
|
}
|
|
|
|
AlignedBuffer DstBuffer;
|
|
};
|
|
|
|
template <typename Benchmark> struct Harness : Benchmark {
|
|
using Benchmark::functor;
|
|
|
|
Harness(const size_t BufferSize, size_t BatchParameterCount,
|
|
std::function<unsigned()> SizeSampler,
|
|
std::function<unsigned()> OffsetSampler)
|
|
: Benchmark(BufferSize), BufferSize(BufferSize),
|
|
BatchParameterCount(BatchParameterCount),
|
|
Parameters(BatchParameterCount), SizeSampler(SizeSampler),
|
|
OffsetSampler(OffsetSampler) {}
|
|
|
|
CircularArrayRef<ParameterType> generateBatch(size_t Iterations) {
|
|
for (auto &P : Parameters) {
|
|
P.OffsetBytes = OffsetSampler();
|
|
P.SizeBytes = SizeSampler();
|
|
if (P.OffsetBytes + P.SizeBytes >= BufferSize)
|
|
report_fatal_error("Call would result in buffer overflow");
|
|
}
|
|
return cycle(makeArrayRef(Parameters), Iterations);
|
|
}
|
|
|
|
private:
|
|
const size_t BufferSize;
|
|
const size_t BatchParameterCount;
|
|
std::vector<ParameterType> Parameters;
|
|
std::function<unsigned()> SizeSampler;
|
|
std::function<unsigned()> OffsetSampler;
|
|
};
|
|
|
|
struct IBenchmark {
|
|
virtual ~IBenchmark() {}
|
|
virtual Study run() = 0;
|
|
};
|
|
|
|
size_t getL1DataCacheSize() {
|
|
const std::vector<CacheInfo> &CacheInfos = HostState::get().Caches;
|
|
const auto IsL1DataCache = [](const CacheInfo &CI) {
|
|
return CI.Type == "Data" && CI.Level == 1;
|
|
};
|
|
const auto CacheIt = find_if(CacheInfos, IsL1DataCache);
|
|
if (CacheIt != CacheInfos.end())
|
|
return CacheIt->Size;
|
|
report_fatal_error("Unable to read L1 Cache Data Size");
|
|
}
|
|
|
|
template <typename Benchmark> struct MemfunctionBenchmark : IBenchmark {
|
|
MemfunctionBenchmark(int64_t L1Size = getL1DataCacheSize())
|
|
: AvailableSize(L1Size - L1LeftAsideBytes - ParameterStorageBytes),
|
|
BufferSize(AvailableSize / Benchmark::BufferCount),
|
|
BatchParameterCount(BufferSize / sizeof(ParameterType)) {
|
|
// Handling command line flags
|
|
if (AvailableSize <= 0 || BufferSize <= 0 || BatchParameterCount < 100)
|
|
report_fatal_error("Not enough L1 cache");
|
|
|
|
if (!isPowerOfTwoOrZero(AlignedAccess))
|
|
report_fatal_error(AlignedAccess.ArgStr +
|
|
Twine(" must be a power of two or zero"));
|
|
|
|
const bool HasDistributionName = !SizeDistributionName.empty();
|
|
if (SweepMode && HasDistributionName)
|
|
report_fatal_error("Select only one of `--" + Twine(SweepMode.ArgStr) +
|
|
"` or `--" + Twine(SizeDistributionName.ArgStr) + "`");
|
|
|
|
if (SweepMode) {
|
|
MaxSizeValue = SweepMaxSize;
|
|
} else {
|
|
std::map<StringRef, MemorySizeDistribution> Map;
|
|
for (MemorySizeDistribution Distribution : Benchmark::GetDistributions())
|
|
Map[Distribution.Name] = Distribution;
|
|
if (Map.count(SizeDistributionName) == 0) {
|
|
std::string Message;
|
|
raw_string_ostream Stream(Message);
|
|
Stream << "Unknown --" << SizeDistributionName.ArgStr << "='"
|
|
<< SizeDistributionName << "', available distributions:\n";
|
|
for (const auto &Pair : Map)
|
|
Stream << "'" << Pair.first << "'\n";
|
|
report_fatal_error(Stream.str());
|
|
}
|
|
SizeDistribution = Map[SizeDistributionName];
|
|
MaxSizeValue = SizeDistribution.Probabilities.size() - 1;
|
|
}
|
|
|
|
// Setup study.
|
|
Study.StudyName = StudyName;
|
|
Runtime &RI = Study.Runtime;
|
|
RI.Host = HostState::get();
|
|
RI.BufferSize = BufferSize;
|
|
RI.BatchParameterCount = BatchParameterCount;
|
|
|
|
BenchmarkOptions &BO = RI.BenchmarkOptions;
|
|
BO.MinDuration = std::chrono::milliseconds(1);
|
|
BO.MaxDuration = std::chrono::seconds(1);
|
|
BO.MaxIterations = 10'000'000U;
|
|
BO.MinSamples = 4;
|
|
BO.MaxSamples = 1000;
|
|
BO.Epsilon = 0.01; // 1%
|
|
BO.ScalingFactor = 1.4;
|
|
|
|
StudyConfiguration &SC = Study.Configuration;
|
|
SC.NumTrials = NumTrials;
|
|
SC.IsSweepMode = SweepMode;
|
|
if (SweepMode)
|
|
SC.SweepModeMaxSize = SweepMaxSize;
|
|
else
|
|
SC.SizeDistributionName = SizeDistributionName;
|
|
SC.AccessAlignment = MaybeAlign(AlignedAccess);
|
|
|
|
// Delegate specific flags and configuration.
|
|
Benchmark::amend(Study);
|
|
}
|
|
|
|
Study run() override {
|
|
if (SweepMode)
|
|
runSweepMode();
|
|
else
|
|
runDistributionMode();
|
|
return Study;
|
|
}
|
|
|
|
private:
|
|
const int64_t AvailableSize;
|
|
const int64_t BufferSize;
|
|
const size_t BatchParameterCount;
|
|
size_t MaxSizeValue = 0;
|
|
MemorySizeDistribution SizeDistribution;
|
|
Study Study;
|
|
std::mt19937_64 Gen;
|
|
|
|
static constexpr bool isPowerOfTwoOrZero(size_t Value) {
|
|
return (Value & (Value - 1U)) == 0;
|
|
}
|
|
|
|
std::function<unsigned()> geOffsetSampler() {
|
|
return [this]() {
|
|
static OffsetDistribution OD(BufferSize, MaxSizeValue,
|
|
Study.Configuration.AccessAlignment);
|
|
return OD(Gen);
|
|
};
|
|
}
|
|
|
|
std::function<unsigned()> getSizeSampler() {
|
|
return [this]() {
|
|
static std::discrete_distribution<unsigned> Distribution(
|
|
SizeDistribution.Probabilities.begin(),
|
|
SizeDistribution.Probabilities.end());
|
|
return Distribution(Gen);
|
|
};
|
|
}
|
|
|
|
void reportProgress(BenchmarkStatus BS) {
|
|
const size_t TotalSteps = Study.Measurements.capacity();
|
|
const size_t Steps = Study.Measurements.size();
|
|
const size_t Percent = 100 * Steps / TotalSteps;
|
|
size_t I = 0;
|
|
errs() << '[';
|
|
for (; I <= Percent; ++I)
|
|
errs() << '#';
|
|
for (; I <= 100; ++I)
|
|
errs() << '_';
|
|
errs() << "] " << Percent << "%\r";
|
|
}
|
|
|
|
void runTrials(const BenchmarkOptions &Options,
|
|
std::function<unsigned()> SizeSampler,
|
|
std::function<unsigned()> OffsetSampler) {
|
|
Harness<Benchmark> B(BufferSize, BatchParameterCount, SizeSampler,
|
|
OffsetSampler);
|
|
for (size_t i = 0; i < NumTrials; ++i) {
|
|
const BenchmarkResult Result = benchmark(Options, B, B.functor());
|
|
Study.Measurements.push_back(Result.BestGuess);
|
|
reportProgress(Result.TerminationStatus);
|
|
}
|
|
}
|
|
|
|
void runSweepMode() {
|
|
Study.Measurements.reserve(NumTrials * SweepMaxSize);
|
|
|
|
BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions;
|
|
BO.MinDuration = std::chrono::milliseconds(1);
|
|
BO.InitialIterations = 100;
|
|
|
|
for (size_t Size = 0; Size <= SweepMaxSize; ++Size) {
|
|
const auto SizeSampler = [Size]() { return Size; };
|
|
runTrials(BO, SizeSampler, geOffsetSampler());
|
|
}
|
|
}
|
|
|
|
void runDistributionMode() {
|
|
Study.Measurements.reserve(NumTrials);
|
|
|
|
BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions;
|
|
BO.MinDuration = std::chrono::milliseconds(10);
|
|
BO.InitialIterations = BatchParameterCount * 10;
|
|
|
|
runTrials(BO, getSizeSampler(), geOffsetSampler());
|
|
}
|
|
};
|
|
|
|
std::unique_ptr<IBenchmark> getMemfunctionBenchmark() {
|
|
switch (MemoryFunction) {
|
|
case memcpy:
|
|
return std::make_unique<MemfunctionBenchmark<MemcpyBenchmark>>();
|
|
case memset:
|
|
return std::make_unique<MemfunctionBenchmark<MemsetBenchmark>>();
|
|
}
|
|
}
|
|
|
|
void writeStudy(const Study &S) {
|
|
std::error_code EC;
|
|
raw_fd_ostream FOS(Output, EC);
|
|
if (EC)
|
|
report_fatal_error(Twine("Could not open file: ")
|
|
.concat(EC.message())
|
|
.concat(", ")
|
|
.concat(Output));
|
|
json::OStream JOS(FOS);
|
|
serializeToJson(S, JOS);
|
|
}
|
|
|
|
void main() {
|
|
checkRequirements();
|
|
auto MB = getMemfunctionBenchmark();
|
|
writeStudy(MB->run());
|
|
}
|
|
|
|
} // namespace libc_benchmarks
|
|
} // namespace llvm
|
|
|
|
int main(int argc, char **argv) {
|
|
llvm::cl::ParseCommandLineOptions(argc, argv);
|
|
#ifndef NDEBUG
|
|
static_assert(
|
|
false,
|
|
"For reproducibility benchmarks should not be compiled in DEBUG mode.");
|
|
#endif
|
|
llvm::libc_benchmarks::main();
|
|
return EXIT_SUCCESS;
|
|
}
|