llvm-project/libc/benchmarks/LibcMemoryBenchmarkMain.cpp

354 lines
11 KiB
C++

//===-- Benchmark ---------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "JSON.h"
#include "LibcBenchmark.h"
#include "LibcMemoryBenchmark.h"
#include "MemorySizeDistributions.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
namespace __llvm_libc {
extern void *memcpy(void *__restrict, const void *__restrict, size_t);
extern void *memset(void *, int, size_t);
} // namespace __llvm_libc
namespace llvm {
namespace libc_benchmarks {
enum Function { memcpy, memset };
static cl::opt<std::string>
StudyName("study-name", cl::desc("The name for this study"), cl::Required);
static cl::opt<Function>
MemoryFunction("function", cl::desc("Sets the function to benchmark:"),
cl::values(clEnumVal(memcpy, "__llvm_libc::memcpy"),
clEnumVal(memset, "__llvm_libc::memset")),
cl::Required);
static cl::opt<std::string>
SizeDistributionName("size-distribution-name",
cl::desc("The name of the distribution to use"));
static cl::opt<bool>
SweepMode("sweep-mode",
cl::desc("If set, benchmark all sizes from 0 to sweep-max-size"));
static cl::opt<uint32_t>
SweepMaxSize("sweep-max-size",
cl::desc("The maximum size to use in sweep-mode"),
cl::init(256));
static cl::opt<uint32_t>
AlignedAccess("aligned-access",
cl::desc("The alignment to use when accessing the buffers\n"
"Default is unaligned\n"
"Use 0 to disable address randomization"),
cl::init(1));
static cl::opt<std::string> Output("output",
cl::desc("Specify output filename"),
cl::value_desc("filename"), cl::init("-"));
static cl::opt<uint32_t>
NumTrials("num-trials", cl::desc("The number of benchmarks run to perform"),
cl::init(1));
static constexpr int64_t KiB = 1024;
static constexpr int64_t ParameterStorageBytes = 4 * KiB;
static constexpr int64_t L1LeftAsideBytes = 1 * KiB;
struct ParameterType {
unsigned OffsetBytes : 16; // max : 16 KiB - 1
unsigned SizeBytes : 16; // max : 16 KiB - 1
};
struct MemcpyBenchmark {
static constexpr auto GetDistributions = &getMemcpySizeDistributions;
static constexpr size_t BufferCount = 2;
static void amend(Study &S) { S.Configuration.Function = "memcpy"; }
MemcpyBenchmark(const size_t BufferSize)
: SrcBuffer(BufferSize), DstBuffer(BufferSize) {}
inline auto functor() {
return [this](ParameterType P) {
__llvm_libc::memcpy(DstBuffer + P.OffsetBytes, SrcBuffer + P.OffsetBytes,
P.SizeBytes);
return DstBuffer + P.OffsetBytes;
};
}
AlignedBuffer SrcBuffer;
AlignedBuffer DstBuffer;
};
struct MemsetBenchmark {
static constexpr auto GetDistributions = &getMemsetSizeDistributions;
static constexpr size_t BufferCount = 1;
static void amend(Study &S) { S.Configuration.Function = "memset"; }
MemsetBenchmark(const size_t BufferSize) : DstBuffer(BufferSize) {}
inline auto functor() {
return [this](ParameterType P) {
__llvm_libc::memset(DstBuffer + P.OffsetBytes, P.OffsetBytes & 0xFF,
P.SizeBytes);
return DstBuffer + P.OffsetBytes;
};
}
AlignedBuffer DstBuffer;
};
template <typename Benchmark> struct Harness : Benchmark {
using Benchmark::functor;
Harness(const size_t BufferSize, size_t BatchParameterCount,
std::function<unsigned()> SizeSampler,
std::function<unsigned()> OffsetSampler)
: Benchmark(BufferSize), BufferSize(BufferSize),
BatchParameterCount(BatchParameterCount),
Parameters(BatchParameterCount), SizeSampler(SizeSampler),
OffsetSampler(OffsetSampler) {}
CircularArrayRef<ParameterType> generateBatch(size_t Iterations) {
for (auto &P : Parameters) {
P.OffsetBytes = OffsetSampler();
P.SizeBytes = SizeSampler();
if (P.OffsetBytes + P.SizeBytes >= BufferSize)
report_fatal_error("Call would result in buffer overflow");
}
return cycle(makeArrayRef(Parameters), Iterations);
}
private:
const size_t BufferSize;
const size_t BatchParameterCount;
std::vector<ParameterType> Parameters;
std::function<unsigned()> SizeSampler;
std::function<unsigned()> OffsetSampler;
};
struct IBenchmark {
virtual ~IBenchmark() {}
virtual Study run() = 0;
};
size_t getL1DataCacheSize() {
const std::vector<CacheInfo> &CacheInfos = HostState::get().Caches;
const auto IsL1DataCache = [](const CacheInfo &CI) {
return CI.Type == "Data" && CI.Level == 1;
};
const auto CacheIt = find_if(CacheInfos, IsL1DataCache);
if (CacheIt != CacheInfos.end())
return CacheIt->Size;
report_fatal_error("Unable to read L1 Cache Data Size");
}
template <typename Benchmark> struct MemfunctionBenchmark : IBenchmark {
MemfunctionBenchmark(int64_t L1Size = getL1DataCacheSize())
: AvailableSize(L1Size - L1LeftAsideBytes - ParameterStorageBytes),
BufferSize(AvailableSize / Benchmark::BufferCount),
BatchParameterCount(BufferSize / sizeof(ParameterType)) {
// Handling command line flags
if (AvailableSize <= 0 || BufferSize <= 0 || BatchParameterCount < 100)
report_fatal_error("Not enough L1 cache");
if (!isPowerOfTwoOrZero(AlignedAccess))
report_fatal_error(AlignedAccess.ArgStr +
Twine(" must be a power of two or zero"));
const bool HasDistributionName = !SizeDistributionName.empty();
if (SweepMode && HasDistributionName)
report_fatal_error("Select only one of `--" + Twine(SweepMode.ArgStr) +
"` or `--" + Twine(SizeDistributionName.ArgStr) + "`");
if (SweepMode) {
MaxSizeValue = SweepMaxSize;
} else {
std::map<StringRef, MemorySizeDistribution> Map;
for (MemorySizeDistribution Distribution : Benchmark::GetDistributions())
Map[Distribution.Name] = Distribution;
if (Map.count(SizeDistributionName) == 0) {
std::string Message;
raw_string_ostream Stream(Message);
Stream << "Unknown --" << SizeDistributionName.ArgStr << "='"
<< SizeDistributionName << "', available distributions:\n";
for (const auto &Pair : Map)
Stream << "'" << Pair.first << "'\n";
report_fatal_error(Stream.str());
}
SizeDistribution = Map[SizeDistributionName];
MaxSizeValue = SizeDistribution.Probabilities.size() - 1;
}
// Setup study.
Study.StudyName = StudyName;
Runtime &RI = Study.Runtime;
RI.Host = HostState::get();
RI.BufferSize = BufferSize;
RI.BatchParameterCount = BatchParameterCount;
BenchmarkOptions &BO = RI.BenchmarkOptions;
BO.MinDuration = std::chrono::milliseconds(1);
BO.MaxDuration = std::chrono::seconds(1);
BO.MaxIterations = 10'000'000U;
BO.MinSamples = 4;
BO.MaxSamples = 1000;
BO.Epsilon = 0.01; // 1%
BO.ScalingFactor = 1.4;
StudyConfiguration &SC = Study.Configuration;
SC.NumTrials = NumTrials;
SC.IsSweepMode = SweepMode;
if (SweepMode)
SC.SweepModeMaxSize = SweepMaxSize;
else
SC.SizeDistributionName = SizeDistributionName;
SC.AccessAlignment = MaybeAlign(AlignedAccess);
// Delegate specific flags and configuration.
Benchmark::amend(Study);
}
Study run() override {
if (SweepMode)
runSweepMode();
else
runDistributionMode();
return Study;
}
private:
const int64_t AvailableSize;
const int64_t BufferSize;
const size_t BatchParameterCount;
size_t MaxSizeValue = 0;
MemorySizeDistribution SizeDistribution;
Study Study;
std::mt19937_64 Gen;
static constexpr bool isPowerOfTwoOrZero(size_t Value) {
return (Value & (Value - 1U)) == 0;
}
std::function<unsigned()> geOffsetSampler() {
return [this]() {
static OffsetDistribution OD(BufferSize, MaxSizeValue,
Study.Configuration.AccessAlignment);
return OD(Gen);
};
}
std::function<unsigned()> getSizeSampler() {
return [this]() {
static std::discrete_distribution<unsigned> Distribution(
SizeDistribution.Probabilities.begin(),
SizeDistribution.Probabilities.end());
return Distribution(Gen);
};
}
void reportProgress(BenchmarkStatus BS) {
const size_t TotalSteps = Study.Measurements.capacity();
const size_t Steps = Study.Measurements.size();
const size_t Percent = 100 * Steps / TotalSteps;
size_t I = 0;
errs() << '[';
for (; I <= Percent; ++I)
errs() << '#';
for (; I <= 100; ++I)
errs() << '_';
errs() << "] " << Percent << "%\r";
}
void runTrials(const BenchmarkOptions &Options,
std::function<unsigned()> SizeSampler,
std::function<unsigned()> OffsetSampler) {
Harness<Benchmark> B(BufferSize, BatchParameterCount, SizeSampler,
OffsetSampler);
for (size_t i = 0; i < NumTrials; ++i) {
const BenchmarkResult Result = benchmark(Options, B, B.functor());
Study.Measurements.push_back(Result.BestGuess);
reportProgress(Result.TerminationStatus);
}
}
void runSweepMode() {
Study.Measurements.reserve(NumTrials * SweepMaxSize);
BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions;
BO.MinDuration = std::chrono::milliseconds(1);
BO.InitialIterations = 100;
for (size_t Size = 0; Size <= SweepMaxSize; ++Size) {
const auto SizeSampler = [Size]() { return Size; };
runTrials(BO, SizeSampler, geOffsetSampler());
}
}
void runDistributionMode() {
Study.Measurements.reserve(NumTrials);
BenchmarkOptions &BO = Study.Runtime.BenchmarkOptions;
BO.MinDuration = std::chrono::milliseconds(10);
BO.InitialIterations = BatchParameterCount * 10;
runTrials(BO, getSizeSampler(), geOffsetSampler());
}
};
std::unique_ptr<IBenchmark> getMemfunctionBenchmark() {
switch (MemoryFunction) {
case memcpy:
return std::make_unique<MemfunctionBenchmark<MemcpyBenchmark>>();
case memset:
return std::make_unique<MemfunctionBenchmark<MemsetBenchmark>>();
}
}
void writeStudy(const Study &S) {
std::error_code EC;
raw_fd_ostream FOS(Output, EC);
if (EC)
report_fatal_error(Twine("Could not open file: ")
.concat(EC.message())
.concat(", ")
.concat(Output));
json::OStream JOS(FOS);
serializeToJson(S, JOS);
}
void main() {
checkRequirements();
auto MB = getMemfunctionBenchmark();
writeStudy(MB->run());
}
} // namespace libc_benchmarks
} // namespace llvm
int main(int argc, char **argv) {
llvm::cl::ParseCommandLineOptions(argc, argv);
#ifndef NDEBUG
static_assert(
false,
"For reproducibility benchmarks should not be compiled in DEBUG mode.");
#endif
llvm::libc_benchmarks::main();
return EXIT_SUCCESS;
}