llvm-project/libcxx/benchmarks/algorithms.bench.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

388 lines
11 KiB
C++
Raw Normal View History

#include <algorithm>
#include <cstdint>
#include <map>
#include <random>
#include <string>
#include <utility>
#include <vector>
#include "CartesianBenchmarks.h"
#include "GenerateInput.h"
#include "benchmark/benchmark.h"
#include "test_macros.h"
namespace {
enum class ValueType { Uint32, Uint64, Pair, Tuple, String };
struct AllValueTypes : EnumValuesAsTuple<AllValueTypes, ValueType, 5> {
static constexpr const char* Names[] = {
"uint32", "uint64", "pair<uint32, uint32>",
"tuple<uint32, uint64, uint32>", "string"};
};
template <class V>
using Value = std::conditional_t<
V() == ValueType::Uint32, uint32_t,
std::conditional_t<
V() == ValueType::Uint64, uint64_t,
std::conditional_t<
V() == ValueType::Pair, std::pair<uint32_t, uint32_t>,
std::conditional_t<V() == ValueType::Tuple,
std::tuple<uint32_t, uint64_t, uint32_t>,
std::string> > > >;
enum class Order {
Random,
Ascending,
Descending,
SingleElement,
PipeOrgan,
[libc++] Add introsort to avoid O(n^2) behavior This commit adds a benchmark that tests std::sort on an adversarial inputs, and uses introsort in std::sort to avoid O(n^2) behavior on adversarial inputs. Inputs where partitions are unbalanced even after 2 log(n) pivots have been selected, the algorithm switches to heap sort to avoid the possibility of spending O(n^2) time on sorting the input. Benchmark results show that the intro sort implementation does significantly better. Benchmarking results before this change. Time represents the sorting time required per element: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 3.75 ns 3.74 ns 187432960 BM_Sort_uint32_QuickSortAdversary_4 3.05 ns 3.05 ns 231211008 BM_Sort_uint32_QuickSortAdversary_16 2.45 ns 2.45 ns 288096256 BM_Sort_uint32_QuickSortAdversary_64 32.8 ns 32.8 ns 21495808 BM_Sort_uint32_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint32_QuickSortAdversary_1024 498 ns 497 ns 1572864 BM_Sort_uint32_QuickSortAdversary_16384 3846 ns 3845 ns 262144 BM_Sort_uint32_QuickSortAdversary_262144 61431 ns 61400 ns 262144 BM_Sort_uint64_QuickSortAdversary_1 3.93 ns 3.92 ns 181141504 BM_Sort_uint64_QuickSortAdversary_4 3.10 ns 3.09 ns 222560256 BM_Sort_uint64_QuickSortAdversary_16 2.50 ns 2.50 ns 283639808 BM_Sort_uint64_QuickSortAdversary_64 33.2 ns 33.2 ns 21757952 BM_Sort_uint64_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint64_QuickSortAdversary_1024 478 ns 477 ns 1572864 BM_Sort_uint64_QuickSortAdversary_16384 3932 ns 3930 ns 262144 BM_Sort_uint64_QuickSortAdversary_262144 61646 ns 61615 ns 262144 Benchmarking results after this change: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 6.31 ns 6.30 ns 107741184 BM_Sort_uint32_QuickSortAdversary_4 4.51 ns 4.50 ns 158859264 BM_Sort_uint32_QuickSortAdversary_16 3.00 ns 3.00 ns 223608832 BM_Sort_uint32_QuickSortAdversary_64 44.8 ns 44.8 ns 15990784 BM_Sort_uint32_QuickSortAdversary_256 69.0 ns 68.9 ns 9961472 BM_Sort_uint32_QuickSortAdversary_1024 118 ns 118 ns 6029312 BM_Sort_uint32_QuickSortAdversary_16384 175 ns 175 ns 4194304 BM_Sort_uint32_QuickSortAdversary_262144 210 ns 210 ns 3407872 BM_Sort_uint64_QuickSortAdversary_1 6.75 ns 6.73 ns 103809024 BM_Sort_uint64_QuickSortAdversary_4 4.53 ns 4.53 ns 160432128 BM_Sort_uint64_QuickSortAdversary_16 2.98 ns 2.97 ns 234356736 BM_Sort_uint64_QuickSortAdversary_64 44.3 ns 44.3 ns 15990784 BM_Sort_uint64_QuickSortAdversary_256 69.2 ns 69.2 ns 10223616 BM_Sort_uint64_QuickSortAdversary_1024 119 ns 119 ns 6029312 BM_Sort_uint64_QuickSortAdversary_16384 173 ns 173 ns 4194304 BM_Sort_uint64_QuickSortAdversary_262144 212 ns 212 ns 3407872 Differential Revision: https://reviews.llvm.org/D113413
2021-11-17 00:37:55 +08:00
Heap,
QuickSortAdversary,
};
[libc++] Add introsort to avoid O(n^2) behavior This commit adds a benchmark that tests std::sort on an adversarial inputs, and uses introsort in std::sort to avoid O(n^2) behavior on adversarial inputs. Inputs where partitions are unbalanced even after 2 log(n) pivots have been selected, the algorithm switches to heap sort to avoid the possibility of spending O(n^2) time on sorting the input. Benchmark results show that the intro sort implementation does significantly better. Benchmarking results before this change. Time represents the sorting time required per element: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 3.75 ns 3.74 ns 187432960 BM_Sort_uint32_QuickSortAdversary_4 3.05 ns 3.05 ns 231211008 BM_Sort_uint32_QuickSortAdversary_16 2.45 ns 2.45 ns 288096256 BM_Sort_uint32_QuickSortAdversary_64 32.8 ns 32.8 ns 21495808 BM_Sort_uint32_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint32_QuickSortAdversary_1024 498 ns 497 ns 1572864 BM_Sort_uint32_QuickSortAdversary_16384 3846 ns 3845 ns 262144 BM_Sort_uint32_QuickSortAdversary_262144 61431 ns 61400 ns 262144 BM_Sort_uint64_QuickSortAdversary_1 3.93 ns 3.92 ns 181141504 BM_Sort_uint64_QuickSortAdversary_4 3.10 ns 3.09 ns 222560256 BM_Sort_uint64_QuickSortAdversary_16 2.50 ns 2.50 ns 283639808 BM_Sort_uint64_QuickSortAdversary_64 33.2 ns 33.2 ns 21757952 BM_Sort_uint64_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint64_QuickSortAdversary_1024 478 ns 477 ns 1572864 BM_Sort_uint64_QuickSortAdversary_16384 3932 ns 3930 ns 262144 BM_Sort_uint64_QuickSortAdversary_262144 61646 ns 61615 ns 262144 Benchmarking results after this change: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 6.31 ns 6.30 ns 107741184 BM_Sort_uint32_QuickSortAdversary_4 4.51 ns 4.50 ns 158859264 BM_Sort_uint32_QuickSortAdversary_16 3.00 ns 3.00 ns 223608832 BM_Sort_uint32_QuickSortAdversary_64 44.8 ns 44.8 ns 15990784 BM_Sort_uint32_QuickSortAdversary_256 69.0 ns 68.9 ns 9961472 BM_Sort_uint32_QuickSortAdversary_1024 118 ns 118 ns 6029312 BM_Sort_uint32_QuickSortAdversary_16384 175 ns 175 ns 4194304 BM_Sort_uint32_QuickSortAdversary_262144 210 ns 210 ns 3407872 BM_Sort_uint64_QuickSortAdversary_1 6.75 ns 6.73 ns 103809024 BM_Sort_uint64_QuickSortAdversary_4 4.53 ns 4.53 ns 160432128 BM_Sort_uint64_QuickSortAdversary_16 2.98 ns 2.97 ns 234356736 BM_Sort_uint64_QuickSortAdversary_64 44.3 ns 44.3 ns 15990784 BM_Sort_uint64_QuickSortAdversary_256 69.2 ns 69.2 ns 10223616 BM_Sort_uint64_QuickSortAdversary_1024 119 ns 119 ns 6029312 BM_Sort_uint64_QuickSortAdversary_16384 173 ns 173 ns 4194304 BM_Sort_uint64_QuickSortAdversary_262144 212 ns 212 ns 3407872 Differential Revision: https://reviews.llvm.org/D113413
2021-11-17 00:37:55 +08:00
struct AllOrders : EnumValuesAsTuple<AllOrders, Order, 7> {
static constexpr const char* Names[] = {"Random", "Ascending",
"Descending", "SingleElement",
[libc++] Add introsort to avoid O(n^2) behavior This commit adds a benchmark that tests std::sort on an adversarial inputs, and uses introsort in std::sort to avoid O(n^2) behavior on adversarial inputs. Inputs where partitions are unbalanced even after 2 log(n) pivots have been selected, the algorithm switches to heap sort to avoid the possibility of spending O(n^2) time on sorting the input. Benchmark results show that the intro sort implementation does significantly better. Benchmarking results before this change. Time represents the sorting time required per element: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 3.75 ns 3.74 ns 187432960 BM_Sort_uint32_QuickSortAdversary_4 3.05 ns 3.05 ns 231211008 BM_Sort_uint32_QuickSortAdversary_16 2.45 ns 2.45 ns 288096256 BM_Sort_uint32_QuickSortAdversary_64 32.8 ns 32.8 ns 21495808 BM_Sort_uint32_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint32_QuickSortAdversary_1024 498 ns 497 ns 1572864 BM_Sort_uint32_QuickSortAdversary_16384 3846 ns 3845 ns 262144 BM_Sort_uint32_QuickSortAdversary_262144 61431 ns 61400 ns 262144 BM_Sort_uint64_QuickSortAdversary_1 3.93 ns 3.92 ns 181141504 BM_Sort_uint64_QuickSortAdversary_4 3.10 ns 3.09 ns 222560256 BM_Sort_uint64_QuickSortAdversary_16 2.50 ns 2.50 ns 283639808 BM_Sort_uint64_QuickSortAdversary_64 33.2 ns 33.2 ns 21757952 BM_Sort_uint64_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint64_QuickSortAdversary_1024 478 ns 477 ns 1572864 BM_Sort_uint64_QuickSortAdversary_16384 3932 ns 3930 ns 262144 BM_Sort_uint64_QuickSortAdversary_262144 61646 ns 61615 ns 262144 Benchmarking results after this change: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 6.31 ns 6.30 ns 107741184 BM_Sort_uint32_QuickSortAdversary_4 4.51 ns 4.50 ns 158859264 BM_Sort_uint32_QuickSortAdversary_16 3.00 ns 3.00 ns 223608832 BM_Sort_uint32_QuickSortAdversary_64 44.8 ns 44.8 ns 15990784 BM_Sort_uint32_QuickSortAdversary_256 69.0 ns 68.9 ns 9961472 BM_Sort_uint32_QuickSortAdversary_1024 118 ns 118 ns 6029312 BM_Sort_uint32_QuickSortAdversary_16384 175 ns 175 ns 4194304 BM_Sort_uint32_QuickSortAdversary_262144 210 ns 210 ns 3407872 BM_Sort_uint64_QuickSortAdversary_1 6.75 ns 6.73 ns 103809024 BM_Sort_uint64_QuickSortAdversary_4 4.53 ns 4.53 ns 160432128 BM_Sort_uint64_QuickSortAdversary_16 2.98 ns 2.97 ns 234356736 BM_Sort_uint64_QuickSortAdversary_64 44.3 ns 44.3 ns 15990784 BM_Sort_uint64_QuickSortAdversary_256 69.2 ns 69.2 ns 10223616 BM_Sort_uint64_QuickSortAdversary_1024 119 ns 119 ns 6029312 BM_Sort_uint64_QuickSortAdversary_16384 173 ns 173 ns 4194304 BM_Sort_uint64_QuickSortAdversary_262144 212 ns 212 ns 3407872 Differential Revision: https://reviews.llvm.org/D113413
2021-11-17 00:37:55 +08:00
"PipeOrgan", "Heap",
"QuickSortAdversary"};
};
[libc++] Add introsort to avoid O(n^2) behavior This commit adds a benchmark that tests std::sort on an adversarial inputs, and uses introsort in std::sort to avoid O(n^2) behavior on adversarial inputs. Inputs where partitions are unbalanced even after 2 log(n) pivots have been selected, the algorithm switches to heap sort to avoid the possibility of spending O(n^2) time on sorting the input. Benchmark results show that the intro sort implementation does significantly better. Benchmarking results before this change. Time represents the sorting time required per element: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 3.75 ns 3.74 ns 187432960 BM_Sort_uint32_QuickSortAdversary_4 3.05 ns 3.05 ns 231211008 BM_Sort_uint32_QuickSortAdversary_16 2.45 ns 2.45 ns 288096256 BM_Sort_uint32_QuickSortAdversary_64 32.8 ns 32.8 ns 21495808 BM_Sort_uint32_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint32_QuickSortAdversary_1024 498 ns 497 ns 1572864 BM_Sort_uint32_QuickSortAdversary_16384 3846 ns 3845 ns 262144 BM_Sort_uint32_QuickSortAdversary_262144 61431 ns 61400 ns 262144 BM_Sort_uint64_QuickSortAdversary_1 3.93 ns 3.92 ns 181141504 BM_Sort_uint64_QuickSortAdversary_4 3.10 ns 3.09 ns 222560256 BM_Sort_uint64_QuickSortAdversary_16 2.50 ns 2.50 ns 283639808 BM_Sort_uint64_QuickSortAdversary_64 33.2 ns 33.2 ns 21757952 BM_Sort_uint64_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint64_QuickSortAdversary_1024 478 ns 477 ns 1572864 BM_Sort_uint64_QuickSortAdversary_16384 3932 ns 3930 ns 262144 BM_Sort_uint64_QuickSortAdversary_262144 61646 ns 61615 ns 262144 Benchmarking results after this change: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 6.31 ns 6.30 ns 107741184 BM_Sort_uint32_QuickSortAdversary_4 4.51 ns 4.50 ns 158859264 BM_Sort_uint32_QuickSortAdversary_16 3.00 ns 3.00 ns 223608832 BM_Sort_uint32_QuickSortAdversary_64 44.8 ns 44.8 ns 15990784 BM_Sort_uint32_QuickSortAdversary_256 69.0 ns 68.9 ns 9961472 BM_Sort_uint32_QuickSortAdversary_1024 118 ns 118 ns 6029312 BM_Sort_uint32_QuickSortAdversary_16384 175 ns 175 ns 4194304 BM_Sort_uint32_QuickSortAdversary_262144 210 ns 210 ns 3407872 BM_Sort_uint64_QuickSortAdversary_1 6.75 ns 6.73 ns 103809024 BM_Sort_uint64_QuickSortAdversary_4 4.53 ns 4.53 ns 160432128 BM_Sort_uint64_QuickSortAdversary_16 2.98 ns 2.97 ns 234356736 BM_Sort_uint64_QuickSortAdversary_64 44.3 ns 44.3 ns 15990784 BM_Sort_uint64_QuickSortAdversary_256 69.2 ns 69.2 ns 10223616 BM_Sort_uint64_QuickSortAdversary_1024 119 ns 119 ns 6029312 BM_Sort_uint64_QuickSortAdversary_16384 173 ns 173 ns 4194304 BM_Sort_uint64_QuickSortAdversary_262144 212 ns 212 ns 3407872 Differential Revision: https://reviews.llvm.org/D113413
2021-11-17 00:37:55 +08:00
// fillAdversarialQuickSortInput fills the input vector with N int-like values.
// These values are arranged in such a way that they would invoke O(N^2)
// behavior on any quick sort implementation that satisifies certain conditions.
// Details are available in the following paper:
// "A Killer Adversary for Quicksort", M. D. McIlroy, Software—Practice &
// ExperienceVolume 29 Issue 4 April 10, 1999 pp 341344.
// https://dl.acm.org/doi/10.5555/311868.311871.
template <class T>
void fillAdversarialQuickSortInput(T& V, size_t N) {
assert(N > 0);
// If an element is equal to gas, it indicates that the value of the element
// is still to be decided and may change over the course of time.
const int gas = N - 1;
V.resize(N);
for (int i = 0; i < N; ++i) {
V[i] = gas;
}
// Candidate for the pivot position.
int candidate = 0;
int nsolid = 0;
// Populate all positions in the generated input to gas.
std::vector<int> ascVals(V.size());
// Fill up with ascending values from 0 to V.size()-1. These will act as
// indices into V.
std::iota(ascVals.begin(), ascVals.end(), 0);
std::sort(ascVals.begin(), ascVals.end(), [&](int x, int y) {
if (V[x] == gas && V[y] == gas) {
// We are comparing two inputs whose value is still to be decided.
if (x == candidate) {
V[x] = nsolid++;
} else {
V[y] = nsolid++;
}
}
if (V[x] == gas) {
candidate = x;
} else if (V[y] == gas) {
candidate = y;
}
return V[x] < V[y];
});
}
template <typename T>
void fillValues(std::vector<T>& V, size_t N, Order O) {
if (O == Order::SingleElement) {
V.resize(N, 0);
[libc++] Add introsort to avoid O(n^2) behavior This commit adds a benchmark that tests std::sort on an adversarial inputs, and uses introsort in std::sort to avoid O(n^2) behavior on adversarial inputs. Inputs where partitions are unbalanced even after 2 log(n) pivots have been selected, the algorithm switches to heap sort to avoid the possibility of spending O(n^2) time on sorting the input. Benchmark results show that the intro sort implementation does significantly better. Benchmarking results before this change. Time represents the sorting time required per element: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 3.75 ns 3.74 ns 187432960 BM_Sort_uint32_QuickSortAdversary_4 3.05 ns 3.05 ns 231211008 BM_Sort_uint32_QuickSortAdversary_16 2.45 ns 2.45 ns 288096256 BM_Sort_uint32_QuickSortAdversary_64 32.8 ns 32.8 ns 21495808 BM_Sort_uint32_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint32_QuickSortAdversary_1024 498 ns 497 ns 1572864 BM_Sort_uint32_QuickSortAdversary_16384 3846 ns 3845 ns 262144 BM_Sort_uint32_QuickSortAdversary_262144 61431 ns 61400 ns 262144 BM_Sort_uint64_QuickSortAdversary_1 3.93 ns 3.92 ns 181141504 BM_Sort_uint64_QuickSortAdversary_4 3.10 ns 3.09 ns 222560256 BM_Sort_uint64_QuickSortAdversary_16 2.50 ns 2.50 ns 283639808 BM_Sort_uint64_QuickSortAdversary_64 33.2 ns 33.2 ns 21757952 BM_Sort_uint64_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint64_QuickSortAdversary_1024 478 ns 477 ns 1572864 BM_Sort_uint64_QuickSortAdversary_16384 3932 ns 3930 ns 262144 BM_Sort_uint64_QuickSortAdversary_262144 61646 ns 61615 ns 262144 Benchmarking results after this change: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 6.31 ns 6.30 ns 107741184 BM_Sort_uint32_QuickSortAdversary_4 4.51 ns 4.50 ns 158859264 BM_Sort_uint32_QuickSortAdversary_16 3.00 ns 3.00 ns 223608832 BM_Sort_uint32_QuickSortAdversary_64 44.8 ns 44.8 ns 15990784 BM_Sort_uint32_QuickSortAdversary_256 69.0 ns 68.9 ns 9961472 BM_Sort_uint32_QuickSortAdversary_1024 118 ns 118 ns 6029312 BM_Sort_uint32_QuickSortAdversary_16384 175 ns 175 ns 4194304 BM_Sort_uint32_QuickSortAdversary_262144 210 ns 210 ns 3407872 BM_Sort_uint64_QuickSortAdversary_1 6.75 ns 6.73 ns 103809024 BM_Sort_uint64_QuickSortAdversary_4 4.53 ns 4.53 ns 160432128 BM_Sort_uint64_QuickSortAdversary_16 2.98 ns 2.97 ns 234356736 BM_Sort_uint64_QuickSortAdversary_64 44.3 ns 44.3 ns 15990784 BM_Sort_uint64_QuickSortAdversary_256 69.2 ns 69.2 ns 10223616 BM_Sort_uint64_QuickSortAdversary_1024 119 ns 119 ns 6029312 BM_Sort_uint64_QuickSortAdversary_16384 173 ns 173 ns 4194304 BM_Sort_uint64_QuickSortAdversary_262144 212 ns 212 ns 3407872 Differential Revision: https://reviews.llvm.org/D113413
2021-11-17 00:37:55 +08:00
} else if (O == Order::QuickSortAdversary) {
fillAdversarialQuickSortInput(V, N);
} else {
while (V.size() < N)
V.push_back(V.size());
}
}
template <typename T>
void fillValues(std::vector<std::pair<T, T> >& V, size_t N, Order O) {
if (O == Order::SingleElement) {
V.resize(N, std::make_pair(0, 0));
} else {
while (V.size() < N)
// Half of array will have the same first element.
if (V.size() % 2) {
V.push_back(std::make_pair(V.size(), V.size()));
} else {
V.push_back(std::make_pair(0, V.size()));
}
}
}
template <typename T1, typename T2, typename T3>
void fillValues(std::vector<std::tuple<T1, T2, T3> >& V, size_t N, Order O) {
if (O == Order::SingleElement) {
V.resize(N, std::make_tuple(0, 0, 0));
} else {
while (V.size() < N)
// One third of array will have the same first element.
// One third of array will have the same first element and the same second element.
switch (V.size() % 3) {
case 0:
V.push_back(std::make_tuple(V.size(), V.size(), V.size()));
break;
case 1:
V.push_back(std::make_tuple(0, V.size(), V.size()));
break;
case 2:
V.push_back(std::make_tuple(0, 0, V.size()));
break;
}
}
}
void fillValues(std::vector<std::string>& V, size_t N, Order O) {
if (O == Order::SingleElement) {
V.resize(N, getRandomString(64));
} else {
while (V.size() < N)
V.push_back(getRandomString(64));
}
}
template <class T>
void sortValues(T& V, Order O) {
assert(std::is_sorted(V.begin(), V.end()));
switch (O) {
case Order::Random: {
std::random_device R;
std::mt19937 M(R());
std::shuffle(V.begin(), V.end(), M);
break;
}
case Order::Ascending:
std::sort(V.begin(), V.end());
break;
case Order::Descending:
std::sort(V.begin(), V.end(), std::greater<>());
break;
case Order::SingleElement:
// Nothing to do
break;
case Order::PipeOrgan:
std::sort(V.begin(), V.end());
std::reverse(V.begin() + V.size() / 2, V.end());
break;
case Order::Heap:
std::make_heap(V.begin(), V.end());
break;
[libc++] Add introsort to avoid O(n^2) behavior This commit adds a benchmark that tests std::sort on an adversarial inputs, and uses introsort in std::sort to avoid O(n^2) behavior on adversarial inputs. Inputs where partitions are unbalanced even after 2 log(n) pivots have been selected, the algorithm switches to heap sort to avoid the possibility of spending O(n^2) time on sorting the input. Benchmark results show that the intro sort implementation does significantly better. Benchmarking results before this change. Time represents the sorting time required per element: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 3.75 ns 3.74 ns 187432960 BM_Sort_uint32_QuickSortAdversary_4 3.05 ns 3.05 ns 231211008 BM_Sort_uint32_QuickSortAdversary_16 2.45 ns 2.45 ns 288096256 BM_Sort_uint32_QuickSortAdversary_64 32.8 ns 32.8 ns 21495808 BM_Sort_uint32_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint32_QuickSortAdversary_1024 498 ns 497 ns 1572864 BM_Sort_uint32_QuickSortAdversary_16384 3846 ns 3845 ns 262144 BM_Sort_uint32_QuickSortAdversary_262144 61431 ns 61400 ns 262144 BM_Sort_uint64_QuickSortAdversary_1 3.93 ns 3.92 ns 181141504 BM_Sort_uint64_QuickSortAdversary_4 3.10 ns 3.09 ns 222560256 BM_Sort_uint64_QuickSortAdversary_16 2.50 ns 2.50 ns 283639808 BM_Sort_uint64_QuickSortAdversary_64 33.2 ns 33.2 ns 21757952 BM_Sort_uint64_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint64_QuickSortAdversary_1024 478 ns 477 ns 1572864 BM_Sort_uint64_QuickSortAdversary_16384 3932 ns 3930 ns 262144 BM_Sort_uint64_QuickSortAdversary_262144 61646 ns 61615 ns 262144 Benchmarking results after this change: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 6.31 ns 6.30 ns 107741184 BM_Sort_uint32_QuickSortAdversary_4 4.51 ns 4.50 ns 158859264 BM_Sort_uint32_QuickSortAdversary_16 3.00 ns 3.00 ns 223608832 BM_Sort_uint32_QuickSortAdversary_64 44.8 ns 44.8 ns 15990784 BM_Sort_uint32_QuickSortAdversary_256 69.0 ns 68.9 ns 9961472 BM_Sort_uint32_QuickSortAdversary_1024 118 ns 118 ns 6029312 BM_Sort_uint32_QuickSortAdversary_16384 175 ns 175 ns 4194304 BM_Sort_uint32_QuickSortAdversary_262144 210 ns 210 ns 3407872 BM_Sort_uint64_QuickSortAdversary_1 6.75 ns 6.73 ns 103809024 BM_Sort_uint64_QuickSortAdversary_4 4.53 ns 4.53 ns 160432128 BM_Sort_uint64_QuickSortAdversary_16 2.98 ns 2.97 ns 234356736 BM_Sort_uint64_QuickSortAdversary_64 44.3 ns 44.3 ns 15990784 BM_Sort_uint64_QuickSortAdversary_256 69.2 ns 69.2 ns 10223616 BM_Sort_uint64_QuickSortAdversary_1024 119 ns 119 ns 6029312 BM_Sort_uint64_QuickSortAdversary_16384 173 ns 173 ns 4194304 BM_Sort_uint64_QuickSortAdversary_262144 212 ns 212 ns 3407872 Differential Revision: https://reviews.llvm.org/D113413
2021-11-17 00:37:55 +08:00
case Order::QuickSortAdversary:
// Nothing to do
break;
}
}
constexpr size_t TestSetElements =
#if !TEST_HAS_FEATURE(memory_sanitizer)
1 << 18;
#else
1 << 14;
#endif
template <class ValueType>
std::vector<std::vector<Value<ValueType> > > makeOrderedValues(size_t N,
Order O) {
std::vector<std::vector<Value<ValueType> > > Ret;
const size_t NumCopies = std::max(size_t{1}, TestSetElements / N);
Ret.resize(NumCopies);
for (auto& V : Ret) {
fillValues(V, N, O);
sortValues(V, O);
}
return Ret;
}
template <class T, class U>
TEST_ALWAYS_INLINE void resetCopies(benchmark::State& state, T& Copies,
U& Orig) {
state.PauseTiming();
for (auto& Copy : Copies)
Copy = Orig;
state.ResumeTiming();
}
enum class BatchSize {
CountElements,
CountBatch,
};
template <class ValueType, class F>
void runOpOnCopies(benchmark::State& state, size_t Quantity, Order O,
BatchSize Count, F Body) {
auto Copies = makeOrderedValues<ValueType>(Quantity, O);
auto Orig = Copies;
const size_t Batch = Count == BatchSize::CountElements
? Copies.size() * Quantity
: Copies.size();
while (state.KeepRunningBatch(Batch)) {
for (auto& Copy : Copies) {
Body(Copy);
benchmark::DoNotOptimize(Copy);
}
state.PauseTiming();
Copies = Orig;
state.ResumeTiming();
}
}
template <class ValueType, class Order>
struct Sort {
size_t Quantity;
void run(benchmark::State& state) const {
runOpOnCopies<ValueType>(
state, Quantity, Order(), BatchSize::CountElements,
[](auto& Copy) { std::sort(Copy.begin(), Copy.end()); });
}
bool skip() const { return Order() == ::Order::Heap; }
std::string name() const {
return "BM_Sort" + ValueType::name() + Order::name() + "_" +
std::to_string(Quantity);
};
};
template <class ValueType, class Order>
struct StableSort {
size_t Quantity;
void run(benchmark::State& state) const {
runOpOnCopies<ValueType>(
state, Quantity, Order(), BatchSize::CountElements,
[](auto& Copy) { std::stable_sort(Copy.begin(), Copy.end()); });
}
bool skip() const { return Order() == ::Order::Heap; }
std::string name() const {
return "BM_StableSort" + ValueType::name() + Order::name() + "_" +
std::to_string(Quantity);
};
};
template <class ValueType, class Order>
struct MakeHeap {
size_t Quantity;
void run(benchmark::State& state) const {
runOpOnCopies<ValueType>(
state, Quantity, Order(), BatchSize::CountElements,
[](auto& Copy) { std::make_heap(Copy.begin(), Copy.end()); });
}
std::string name() const {
return "BM_MakeHeap" + ValueType::name() + Order::name() + "_" +
std::to_string(Quantity);
};
};
template <class ValueType>
struct SortHeap {
size_t Quantity;
void run(benchmark::State& state) const {
runOpOnCopies<ValueType>(
state, Quantity, Order::Heap, BatchSize::CountElements,
[](auto& Copy) { std::sort_heap(Copy.begin(), Copy.end()); });
}
std::string name() const {
return "BM_SortHeap" + ValueType::name() + "_" + std::to_string(Quantity);
};
};
template <class ValueType, class Order>
struct MakeThenSortHeap {
size_t Quantity;
void run(benchmark::State& state) const {
runOpOnCopies<ValueType>(state, Quantity, Order(), BatchSize::CountElements,
[](auto& Copy) {
std::make_heap(Copy.begin(), Copy.end());
std::sort_heap(Copy.begin(), Copy.end());
});
}
std::string name() const {
return "BM_MakeThenSortHeap" + ValueType::name() + Order::name() + "_" +
std::to_string(Quantity);
};
};
template <class ValueType, class Order>
struct PushHeap {
size_t Quantity;
void run(benchmark::State& state) const {
runOpOnCopies<ValueType>(
state, Quantity, Order(), BatchSize::CountElements, [](auto& Copy) {
for (auto I = Copy.begin(), E = Copy.end(); I != E; ++I) {
std::push_heap(Copy.begin(), I + 1);
}
});
}
bool skip() const { return Order() == ::Order::Heap; }
std::string name() const {
return "BM_PushHeap" + ValueType::name() + Order::name() + "_" +
std::to_string(Quantity);
};
};
template <class ValueType>
struct PopHeap {
size_t Quantity;
void run(benchmark::State& state) const {
runOpOnCopies<ValueType>(
state, Quantity, Order(), BatchSize::CountElements, [](auto& Copy) {
for (auto B = Copy.begin(), I = Copy.end(); I != B; --I) {
std::pop_heap(B, I);
}
});
}
std::string name() const {
return "BM_PopHeap" + ValueType::name() + "_" + std::to_string(Quantity);
};
};
} // namespace
int main(int argc, char** argv) {
benchmark::Initialize(&argc, argv);
if (benchmark::ReportUnrecognizedArguments(argc, argv))
return 1;
const std::vector<size_t> Quantities = {1 << 0, 1 << 2, 1 << 4, 1 << 6,
1 << 8, 1 << 10, 1 << 14,
// Running each benchmark in parallel consumes too much memory with MSAN
// and can lead to the test process being killed.
#if !TEST_HAS_FEATURE(memory_sanitizer)
1 << 18
#endif
};
makeCartesianProductBenchmark<Sort, AllValueTypes, AllOrders>(Quantities);
makeCartesianProductBenchmark<StableSort, AllValueTypes, AllOrders>(
Quantities);
makeCartesianProductBenchmark<MakeHeap, AllValueTypes, AllOrders>(Quantities);
makeCartesianProductBenchmark<SortHeap, AllValueTypes>(Quantities);
makeCartesianProductBenchmark<MakeThenSortHeap, AllValueTypes, AllOrders>(
Quantities);
makeCartesianProductBenchmark<PushHeap, AllValueTypes, AllOrders>(Quantities);
makeCartesianProductBenchmark<PopHeap, AllValueTypes>(Quantities);
benchmark::RunSpecifiedBenchmarks();
}