llvm-project/libcxx/benchmarks/ContainerBenchmarks.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

155 lines
4.6 KiB
C
Raw Normal View History

// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef BENCHMARK_CONTAINER_BENCHMARKS_H
#define BENCHMARK_CONTAINER_BENCHMARKS_H
#include <cassert>
#include "Utilities.h"
#include "benchmark/benchmark.h"
namespace ContainerBenchmarks {
Fix PR35637: suboptimal codegen for `vector<unsigned char>`. The optimizer is petulant and temperamental. In this case LLVM failed to lower the the "insert at end" loop used by`vector<unsigned char>` to a `memset` despite `memset` being substantially faster over a range of bytes. LLVM has the ability to lower loops to `memset` whet appropriate, but the odd nature of libc++'s loops prevented the optimization from taking places. This patch addresses the issue by rewriting the loops from the form `do [ ... --__n; } while (__n > 0);` to instead use a for loop over a pointer range (For example: `for (auto *__i = ...; __i < __e; ++__i)`). This patch also rewrites the asan annotations to unposion all additional memory at the start of the loop instead of once per iterations. This could potentially permit false negatives where the constructor of element N attempts to access element N + 1 during its construction. The before and after results for the `BM_ConstructSize/vector_byte/5140480_mean` benchmark (run 5 times) are: -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- Before ------ BM_ConstructSize/vector_byte/5140480_mean 12530140 ns 12469693 ns N/A BM_ConstructSize/vector_byte/5140480_median 12512818 ns 12445571 ns N/A BM_ConstructSize/vector_byte/5140480_stddev 106224 ns 107907 ns 5 ----- After ----- BM_ConstructSize/vector_byte/5140480_mean 167285 ns 166500 ns N/A BM_ConstructSize/vector_byte/5140480_median 166749 ns 166069 ns N/A BM_ConstructSize/vector_byte/5140480_stddev 3242 ns 3184 ns 5 llvm-svn: 367183
2019-07-28 12:37:02 +08:00
template <class Container>
void BM_ConstructSize(benchmark::State& st, Container) {
auto size = st.range(0);
for (auto _ : st) {
Container c(size);
DoNotOptimizeData(c);
Fix PR35637: suboptimal codegen for `vector<unsigned char>`. The optimizer is petulant and temperamental. In this case LLVM failed to lower the the "insert at end" loop used by`vector<unsigned char>` to a `memset` despite `memset` being substantially faster over a range of bytes. LLVM has the ability to lower loops to `memset` whet appropriate, but the odd nature of libc++'s loops prevented the optimization from taking places. This patch addresses the issue by rewriting the loops from the form `do [ ... --__n; } while (__n > 0);` to instead use a for loop over a pointer range (For example: `for (auto *__i = ...; __i < __e; ++__i)`). This patch also rewrites the asan annotations to unposion all additional memory at the start of the loop instead of once per iterations. This could potentially permit false negatives where the constructor of element N attempts to access element N + 1 during its construction. The before and after results for the `BM_ConstructSize/vector_byte/5140480_mean` benchmark (run 5 times) are: -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- Before ------ BM_ConstructSize/vector_byte/5140480_mean 12530140 ns 12469693 ns N/A BM_ConstructSize/vector_byte/5140480_median 12512818 ns 12445571 ns N/A BM_ConstructSize/vector_byte/5140480_stddev 106224 ns 107907 ns 5 ----- After ----- BM_ConstructSize/vector_byte/5140480_mean 167285 ns 166500 ns N/A BM_ConstructSize/vector_byte/5140480_median 166749 ns 166069 ns N/A BM_ConstructSize/vector_byte/5140480_stddev 3242 ns 3184 ns 5 llvm-svn: 367183
2019-07-28 12:37:02 +08:00
}
}
template <class Container>
void BM_ConstructSizeValue(benchmark::State& st, Container, typename Container::value_type const& val) {
const auto size = st.range(0);
for (auto _ : st) {
Container c(size, val);
DoNotOptimizeData(c);
Fix PR35637: suboptimal codegen for `vector<unsigned char>`. The optimizer is petulant and temperamental. In this case LLVM failed to lower the the "insert at end" loop used by`vector<unsigned char>` to a `memset` despite `memset` being substantially faster over a range of bytes. LLVM has the ability to lower loops to `memset` whet appropriate, but the odd nature of libc++'s loops prevented the optimization from taking places. This patch addresses the issue by rewriting the loops from the form `do [ ... --__n; } while (__n > 0);` to instead use a for loop over a pointer range (For example: `for (auto *__i = ...; __i < __e; ++__i)`). This patch also rewrites the asan annotations to unposion all additional memory at the start of the loop instead of once per iterations. This could potentially permit false negatives where the constructor of element N attempts to access element N + 1 during its construction. The before and after results for the `BM_ConstructSize/vector_byte/5140480_mean` benchmark (run 5 times) are: -------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------- Before ------ BM_ConstructSize/vector_byte/5140480_mean 12530140 ns 12469693 ns N/A BM_ConstructSize/vector_byte/5140480_median 12512818 ns 12445571 ns N/A BM_ConstructSize/vector_byte/5140480_stddev 106224 ns 107907 ns 5 ----- After ----- BM_ConstructSize/vector_byte/5140480_mean 167285 ns 166500 ns N/A BM_ConstructSize/vector_byte/5140480_median 166749 ns 166069 ns N/A BM_ConstructSize/vector_byte/5140480_stddev 3242 ns 3184 ns 5 llvm-svn: 367183
2019-07-28 12:37:02 +08:00
}
}
template <class Container, class GenInputs>
void BM_ConstructIterIter(benchmark::State& st, Container, GenInputs gen) {
auto in = gen(st.range(0));
const auto begin = in.begin();
const auto end = in.end();
benchmark::DoNotOptimize(&in);
while (st.KeepRunning()) {
Container c(begin, end);
DoNotOptimizeData(c);
}
}
template <class Container, class GenInputs>
void BM_InsertValue(benchmark::State& st, Container c, GenInputs gen) {
auto in = gen(st.range(0));
const auto end = in.end();
while (st.KeepRunning()) {
c.clear();
for (auto it = in.begin(); it != end; ++it) {
benchmark::DoNotOptimize(&(*c.insert(*it).first));
}
benchmark::ClobberMemory();
}
}
template <class Container, class GenInputs>
void BM_InsertValueRehash(benchmark::State& st, Container c, GenInputs gen) {
auto in = gen(st.range(0));
const auto end = in.end();
while (st.KeepRunning()) {
c.clear();
c.rehash(16);
for (auto it = in.begin(); it != end; ++it) {
benchmark::DoNotOptimize(&(*c.insert(*it).first));
}
benchmark::ClobberMemory();
}
}
template <class Container, class GenInputs>
void BM_InsertDuplicate(benchmark::State& st, Container c, GenInputs gen) {
auto in = gen(st.range(0));
const auto end = in.end();
c.insert(in.begin(), in.end());
benchmark::DoNotOptimize(&c);
benchmark::DoNotOptimize(&in);
while (st.KeepRunning()) {
for (auto it = in.begin(); it != end; ++it) {
benchmark::DoNotOptimize(&(*c.insert(*it).first));
}
benchmark::ClobberMemory();
}
}
template <class Container, class GenInputs>
void BM_EmplaceDuplicate(benchmark::State& st, Container c, GenInputs gen) {
auto in = gen(st.range(0));
const auto end = in.end();
c.insert(in.begin(), in.end());
benchmark::DoNotOptimize(&c);
benchmark::DoNotOptimize(&in);
while (st.KeepRunning()) {
for (auto it = in.begin(); it != end; ++it) {
benchmark::DoNotOptimize(&(*c.emplace(*it).first));
}
benchmark::ClobberMemory();
}
}
template <class Container, class GenInputs>
static void BM_Find(benchmark::State& st, Container c, GenInputs gen) {
auto in = gen(st.range(0));
c.insert(in.begin(), in.end());
benchmark::DoNotOptimize(&(*c.begin()));
const auto end = in.data() + in.size();
while (st.KeepRunning()) {
for (auto it = in.data(); it != end; ++it) {
benchmark::DoNotOptimize(&(*c.find(*it)));
}
benchmark::ClobberMemory();
}
}
template <class Container, class GenInputs>
static void BM_FindRehash(benchmark::State& st, Container c, GenInputs gen) {
c.rehash(8);
auto in = gen(st.range(0));
c.insert(in.begin(), in.end());
benchmark::DoNotOptimize(&(*c.begin()));
const auto end = in.data() + in.size();
while (st.KeepRunning()) {
for (auto it = in.data(); it != end; ++it) {
benchmark::DoNotOptimize(&(*c.find(*it)));
}
benchmark::ClobberMemory();
}
}
template <class Container, class GenInputs>
static void BM_Rehash(benchmark::State& st, Container c, GenInputs gen) {
auto in = gen(st.range(0));
c.max_load_factor(3.0);
c.insert(in.begin(), in.end());
benchmark::DoNotOptimize(c);
const auto bucket_count = c.bucket_count();
while (st.KeepRunning()) {
c.rehash(bucket_count + 1);
c.rehash(bucket_count);
benchmark::ClobberMemory();
}
}
} // end namespace ContainerBenchmarks
#endif // BENCHMARK_CONTAINER_BENCHMARKS_H