[libc] Allow benchmarking several implementations at the same time.

Next step is to generate an archive with all implementations and a header listing them all.

Differential Revision: https://reviews.llvm.org/D107336
This commit is contained in:
Guillaume Chatelet 2021-08-03 10:42:28 +00:00
parent 559426ae76
commit e4dee76224
3 changed files with 122 additions and 76 deletions

View File

@ -104,11 +104,11 @@ void ParameterBatch::checkValid(const ParameterType &P) const {
.concat(llvm::Twine(BufferSize))); .concat(llvm::Twine(BufferSize)));
} }
CopyHarness::CopyHarness() CopySetup::CopySetup()
: ParameterBatch(2), SrcBuffer(ParameterBatch::BufferSize), : ParameterBatch(2), SrcBuffer(ParameterBatch::BufferSize),
DstBuffer(ParameterBatch::BufferSize) {} DstBuffer(ParameterBatch::BufferSize) {}
ComparisonHarness::ComparisonHarness() ComparisonSetup::ComparisonSetup()
: ParameterBatch(2), LhsBuffer(ParameterBatch::BufferSize), : ParameterBatch(2), LhsBuffer(ParameterBatch::BufferSize),
RhsBuffer(ParameterBatch::BufferSize) { RhsBuffer(ParameterBatch::BufferSize) {
// The memcmp buffers always compare equal. // The memcmp buffers always compare equal.
@ -116,7 +116,7 @@ ComparisonHarness::ComparisonHarness()
memset(RhsBuffer.begin(), 0xF, BufferSize); memset(RhsBuffer.begin(), 0xF, BufferSize);
} }
SetHarness::SetHarness() SetSetup::SetSetup()
: ParameterBatch(1), DstBuffer(ParameterBatch::BufferSize) {} : ParameterBatch(1), DstBuffer(ParameterBatch::BufferSize) {}
} // namespace libc_benchmarks } // namespace libc_benchmarks

View File

@ -185,19 +185,43 @@ struct ParameterBatch {
std::vector<ParameterType> Parameters; std::vector<ParameterType> Parameters;
}; };
/// Memory function prototype and configuration.
using MemcpyFunction = void *(*)(void *__restrict, const void *__restrict,
size_t);
struct MemcpyConfiguration {
MemcpyFunction Function;
llvm::StringRef Name;
};
using MemsetFunction = void *(*)(void *, int, size_t);
struct MemsetConfiguration {
MemsetFunction Function;
llvm::StringRef Name;
};
using BzeroFunction = void (*)(void *, size_t);
struct BzeroConfiguration {
BzeroFunction Function;
llvm::StringRef Name;
};
using MemcmpFunction = int (*)(const void *, const void *, size_t);
struct MemcmpConfiguration {
MemcmpFunction Function;
llvm::StringRef Name;
};
/// Provides source and destination buffers for the Copy operation as well as /// Provides source and destination buffers for the Copy operation as well as
/// the associated size distributions. /// the associated size distributions.
struct CopyHarness : public ParameterBatch { struct CopySetup : public ParameterBatch {
CopyHarness(); CopySetup();
inline static const ArrayRef<MemorySizeDistribution> getDistributions() { inline static const ArrayRef<MemorySizeDistribution> getDistributions() {
return getMemcpySizeDistributions(); return getMemcpySizeDistributions();
} }
inline void *Call(ParameterType Parameter, inline void *Call(ParameterType Parameter, MemcpyFunction Memcpy) {
void *(*memcpy)(void *__restrict, const void *__restrict, return Memcpy(DstBuffer + Parameter.OffsetBytes,
size_t)) {
return memcpy(DstBuffer + Parameter.OffsetBytes,
SrcBuffer + Parameter.OffsetBytes, Parameter.SizeBytes); SrcBuffer + Parameter.OffsetBytes, Parameter.SizeBytes);
} }
@ -208,21 +232,20 @@ private:
/// Provides destination buffer for the Set operation as well as the associated /// Provides destination buffer for the Set operation as well as the associated
/// size distributions. /// size distributions.
struct SetHarness : public ParameterBatch { struct SetSetup : public ParameterBatch {
SetHarness(); SetSetup();
inline static const ArrayRef<MemorySizeDistribution> getDistributions() { inline static const ArrayRef<MemorySizeDistribution> getDistributions() {
return getMemsetSizeDistributions(); return getMemsetSizeDistributions();
} }
inline void *Call(ParameterType Parameter, inline void *Call(ParameterType Parameter, MemsetFunction Memset) {
void *(*memset)(void *, int, size_t)) { return Memset(DstBuffer + Parameter.OffsetBytes,
return memset(DstBuffer + Parameter.OffsetBytes,
Parameter.OffsetBytes % 0xFF, Parameter.SizeBytes); Parameter.OffsetBytes % 0xFF, Parameter.SizeBytes);
} }
inline void *Call(ParameterType Parameter, void (*bzero)(void *, size_t)) { inline void *Call(ParameterType Parameter, BzeroFunction Bzero) {
bzero(DstBuffer + Parameter.OffsetBytes, Parameter.SizeBytes); Bzero(DstBuffer + Parameter.OffsetBytes, Parameter.SizeBytes);
return DstBuffer.begin(); return DstBuffer.begin();
} }
@ -232,16 +255,15 @@ private:
/// Provides left and right buffers for the Comparison operation as well as the /// Provides left and right buffers for the Comparison operation as well as the
/// associated size distributions. /// associated size distributions.
struct ComparisonHarness : public ParameterBatch { struct ComparisonSetup : public ParameterBatch {
ComparisonHarness(); ComparisonSetup();
inline static const ArrayRef<MemorySizeDistribution> getDistributions() { inline static const ArrayRef<MemorySizeDistribution> getDistributions() {
return getMemcmpSizeDistributions(); return getMemcmpSizeDistributions();
} }
inline int Call(ParameterType Parameter, inline int Call(ParameterType Parameter, MemcmpFunction Memcmp) {
int (*memcmp)(const void *, const void *, size_t)) { return Memcmp(LhsBuffer + Parameter.OffsetBytes,
return memcmp(LhsBuffer + Parameter.OffsetBytes,
RhsBuffer + Parameter.OffsetBytes, Parameter.SizeBytes); RhsBuffer + Parameter.OffsetBytes, Parameter.SizeBytes);
} }

View File

@ -2,10 +2,26 @@
#include "LibcMemoryBenchmark.h" #include "LibcMemoryBenchmark.h"
#include "MemorySizeDistributions.h" #include "MemorySizeDistributions.h"
#include "benchmark/benchmark.h" #include "benchmark/benchmark.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Twine.h"
#include <chrono>
#include <cstdint> #include <cstdint>
#include <random> #include <random>
#include <vector> #include <vector>
using llvm::Align;
using llvm::ArrayRef;
using llvm::Twine;
using llvm::libc_benchmarks::BzeroConfiguration;
using llvm::libc_benchmarks::ComparisonSetup;
using llvm::libc_benchmarks::CopySetup;
using llvm::libc_benchmarks::MemcmpConfiguration;
using llvm::libc_benchmarks::MemcpyConfiguration;
using llvm::libc_benchmarks::MemorySizeDistribution;
using llvm::libc_benchmarks::MemsetConfiguration;
using llvm::libc_benchmarks::OffsetDistribution;
using llvm::libc_benchmarks::SetSetup;
namespace __llvm_libc { namespace __llvm_libc {
extern void *memcpy(void *__restrict, const void *__restrict, size_t); extern void *memcpy(void *__restrict, const void *__restrict, size_t);
@ -15,84 +31,92 @@ extern int memcmp(const void *, const void *, size_t);
} // namespace __llvm_libc } // namespace __llvm_libc
using llvm::Align; // List of implementations to test.
using llvm::ArrayRef; static constexpr MemcpyConfiguration kMemcpyConfigurations[] = {
using llvm::libc_benchmarks::ComparisonHarness; {__llvm_libc::memcpy, "__llvm_libc::memcpy"}};
using llvm::libc_benchmarks::CopyHarness;
using llvm::libc_benchmarks::MemorySizeDistribution;
using llvm::libc_benchmarks::OffsetDistribution;
using llvm::libc_benchmarks::SetHarness;
static constexpr MemcmpConfiguration kMemcmpConfigurations[] = {
{__llvm_libc::memcmp, "__llvm_libc::memcmp"}};
static constexpr MemsetConfiguration kMemsetConfigurations[] = {
{__llvm_libc::memset, "__llvm_libc::memset"}};
static constexpr BzeroConfiguration kBzeroConfigurations[] = {
{__llvm_libc::bzero, "__llvm_libc::bzero"}};
// Alignment to use for when accessing the buffers.
static constexpr Align kBenchmarkAlignment = Align::Constant<1>(); static constexpr Align kBenchmarkAlignment = Align::Constant<1>();
template <typename Harness> struct Randomized : public Harness { static std::mt19937_64 &getGenerator() {
Randomized(benchmark::State &State) static std::mt19937_64 Generator(
: State(State), Distribution(Harness::getDistributions()[State.range(0)]), std::chrono::system_clock::now().time_since_epoch().count());
return Generator;
}
template <typename SetupType, typename ConfigurationType> struct Runner {
Runner(benchmark::State &S, llvm::ArrayRef<ConfigurationType> Configurations)
: State(S), Distribution(SetupType::getDistributions()[State.range(0)]),
Probabilities(Distribution.Probabilities), Probabilities(Distribution.Probabilities),
SizeSampler(Probabilities.begin(), Probabilities.end()), SizeSampler(Probabilities.begin(), Probabilities.end()),
OffsetSampler(Harness::BufferSize, Probabilities.size() - 1, OffsetSampler(Setup.BufferSize, Probabilities.size() - 1,
kBenchmarkAlignment) { kBenchmarkAlignment),
for (auto &P : Harness::Parameters) { Configuration(Configurations[State.range(1)]) {
P.OffsetBytes = OffsetSampler(Gen); for (auto &P : Setup.Parameters) {
P.SizeBytes = SizeSampler(Gen); P.OffsetBytes = OffsetSampler(getGenerator());
Harness::checkValid(P); P.SizeBytes = SizeSampler(getGenerator());
Setup.checkValid(P);
} }
} }
~Randomized() { ~Runner() {
const size_t AvgBytesPerIteration = const size_t AvgBytesPerIteration = Setup.getBatchBytes() / Setup.BatchSize;
Harness::getBatchBytes() / Harness::BatchSize;
const size_t TotalBytes = State.iterations() * AvgBytesPerIteration; const size_t TotalBytes = State.iterations() * AvgBytesPerIteration;
State.SetBytesProcessed(TotalBytes); State.SetBytesProcessed(TotalBytes);
State.SetLabel(Distribution.Name.str()); State.SetItemsProcessed(State.iterations());
State.SetLabel((Twine(Configuration.Name) + "," + Distribution.Name).str());
State.counters["bytes_per_cycle"] = benchmark::Counter( State.counters["bytes_per_cycle"] = benchmark::Counter(
TotalBytes / benchmark::CPUInfo::Get().cycles_per_second, TotalBytes / benchmark::CPUInfo::Get().cycles_per_second,
benchmark::Counter::kIsRate); benchmark::Counter::kIsRate);
} }
template <typename Function> inline void runBatch(Function foo) { inline void runBatch() {
for (const auto &P : Harness::Parameters) for (const auto &P : Setup.Parameters)
benchmark::DoNotOptimize(Harness::Call(P, foo)); benchmark::DoNotOptimize(Setup.Call(P, Configuration.Function));
} }
size_t getBatchSize() const { return Setup.BatchSize; }
private: private:
SetupType Setup;
benchmark::State &State; benchmark::State &State;
Harness UP;
MemorySizeDistribution Distribution; MemorySizeDistribution Distribution;
ArrayRef<double> Probabilities; ArrayRef<double> Probabilities;
std::discrete_distribution<unsigned> SizeSampler; std::discrete_distribution<unsigned> SizeSampler;
OffsetDistribution OffsetSampler; OffsetDistribution OffsetSampler;
std::mt19937_64 Gen; ConfigurationType Configuration;
}; };
template <typename Harness> static int64_t getMaxIndex() { #define BENCHMARK_MEMORY_FUNCTION(BM_NAME, SETUP, CONFIGURATION_TYPE, \
return Harness::getDistributions().size() - 1; CONFIGURATION_ARRAY_REF) \
} void BM_NAME(benchmark::State &State) { \
Runner<SETUP, CONFIGURATION_TYPE> Setup(State, CONFIGURATION_ARRAY_REF); \
const size_t BatchSize = Setup.getBatchSize(); \
while (State.KeepRunningBatch(BatchSize)) \
Setup.runBatch(); \
} \
BENCHMARK(BM_NAME)->Apply([](benchmark::internal::Benchmark *benchmark) { \
const int64_t DistributionSize = SETUP::getDistributions().size(); \
const int64_t ConfigurationSize = CONFIGURATION_ARRAY_REF.size(); \
for (int64_t DistIndex = 0; DistIndex < DistributionSize; ++DistIndex) \
for (int64_t ConfIndex = 0; ConfIndex < ConfigurationSize; ++ConfIndex) \
benchmark->Args({DistIndex, ConfIndex}); \
})
void BM_Memcpy(benchmark::State &State) { BENCHMARK_MEMORY_FUNCTION(BM_Memcpy, CopySetup, MemcpyConfiguration,
Randomized<CopyHarness> Harness(State); llvm::makeArrayRef(kMemcpyConfigurations));
while (State.KeepRunningBatch(Harness.BatchSize)) BENCHMARK_MEMORY_FUNCTION(BM_Memcmp, ComparisonSetup, MemcmpConfiguration,
Harness.runBatch(__llvm_libc::memcpy); llvm::makeArrayRef(kMemcmpConfigurations));
} BENCHMARK_MEMORY_FUNCTION(BM_Memset, SetSetup, MemsetConfiguration,
BENCHMARK(BM_Memcpy)->DenseRange(0, getMaxIndex<CopyHarness>()); llvm::makeArrayRef(kMemsetConfigurations));
BENCHMARK_MEMORY_FUNCTION(BM_Bzero, SetSetup, BzeroConfiguration,
void BM_Memcmp(benchmark::State &State) { llvm::makeArrayRef(kBzeroConfigurations));
Randomized<ComparisonHarness> Harness(State);
while (State.KeepRunningBatch(Harness.BatchSize))
Harness.runBatch(__llvm_libc::memcmp);
}
BENCHMARK(BM_Memcmp)->DenseRange(0, getMaxIndex<ComparisonHarness>());
void BM_Memset(benchmark::State &State) {
Randomized<SetHarness> Harness(State);
while (State.KeepRunningBatch(Harness.BatchSize))
Harness.runBatch(__llvm_libc::memset);
}
BENCHMARK(BM_Memset)->DenseRange(0, getMaxIndex<SetHarness>());
void BM_Bzero(benchmark::State &State) {
Randomized<SetHarness> Harness(State);
while (State.KeepRunningBatch(Harness.BatchSize))
Harness.runBatch(__llvm_libc::bzero);
}
BENCHMARK(BM_Bzero)->DenseRange(0, getMaxIndex<SetHarness>());