[libc++] Add introsort to avoid O(n^2) behavior

This commit adds a benchmark that tests std::sort on an adversarial inputs,
and uses introsort in std::sort to avoid O(n^2) behavior on adversarial
inputs.

Inputs where partitions are unbalanced even after 2 log(n) pivots have
been selected, the algorithm switches to heap sort to avoid the
possibility of spending O(n^2) time on sorting the input.
Benchmark results show that the intro sort implementation does
significantly better.

Benchmarking results before this change. Time represents the sorting
time required per element:

----------------------------------------------------------------------------------------------------------
Benchmark                                                                Time             CPU   Iterations
----------------------------------------------------------------------------------------------------------
BM_Sort_uint32_QuickSortAdversary_1                                   3.75 ns         3.74 ns    187432960
BM_Sort_uint32_QuickSortAdversary_4                                   3.05 ns         3.05 ns    231211008
BM_Sort_uint32_QuickSortAdversary_16                                  2.45 ns         2.45 ns    288096256
BM_Sort_uint32_QuickSortAdversary_64                                  32.8 ns         32.8 ns     21495808
BM_Sort_uint32_QuickSortAdversary_256                                  132 ns          132 ns      5505024
BM_Sort_uint32_QuickSortAdversary_1024                                 498 ns          497 ns      1572864
BM_Sort_uint32_QuickSortAdversary_16384                               3846 ns         3845 ns       262144
BM_Sort_uint32_QuickSortAdversary_262144                             61431 ns        61400 ns       262144
BM_Sort_uint64_QuickSortAdversary_1                                   3.93 ns         3.92 ns    181141504
BM_Sort_uint64_QuickSortAdversary_4                                   3.10 ns         3.09 ns    222560256
BM_Sort_uint64_QuickSortAdversary_16                                  2.50 ns         2.50 ns    283639808
BM_Sort_uint64_QuickSortAdversary_64                                  33.2 ns         33.2 ns     21757952
BM_Sort_uint64_QuickSortAdversary_256                                  132 ns          132 ns      5505024
BM_Sort_uint64_QuickSortAdversary_1024                                 478 ns          477 ns      1572864
BM_Sort_uint64_QuickSortAdversary_16384                               3932 ns         3930 ns       262144
BM_Sort_uint64_QuickSortAdversary_262144                             61646 ns        61615 ns       262144

Benchmarking results after this change:

----------------------------------------------------------------------------------------------------------
Benchmark                                                                Time             CPU   Iterations
----------------------------------------------------------------------------------------------------------
BM_Sort_uint32_QuickSortAdversary_1                                   6.31 ns         6.30 ns    107741184
BM_Sort_uint32_QuickSortAdversary_4                                   4.51 ns         4.50 ns    158859264
BM_Sort_uint32_QuickSortAdversary_16                                  3.00 ns         3.00 ns    223608832
BM_Sort_uint32_QuickSortAdversary_64                                  44.8 ns         44.8 ns     15990784
BM_Sort_uint32_QuickSortAdversary_256                                 69.0 ns         68.9 ns      9961472
BM_Sort_uint32_QuickSortAdversary_1024                                 118 ns          118 ns      6029312
BM_Sort_uint32_QuickSortAdversary_16384                                175 ns          175 ns      4194304
BM_Sort_uint32_QuickSortAdversary_262144                               210 ns          210 ns      3407872
BM_Sort_uint64_QuickSortAdversary_1                                   6.75 ns         6.73 ns    103809024
BM_Sort_uint64_QuickSortAdversary_4                                   4.53 ns         4.53 ns    160432128
BM_Sort_uint64_QuickSortAdversary_16                                  2.98 ns         2.97 ns    234356736
BM_Sort_uint64_QuickSortAdversary_64                                  44.3 ns         44.3 ns     15990784
BM_Sort_uint64_QuickSortAdversary_256                                 69.2 ns         69.2 ns     10223616
BM_Sort_uint64_QuickSortAdversary_1024                                 119 ns          119 ns      6029312
BM_Sort_uint64_QuickSortAdversary_16384                                173 ns          173 ns      4194304
BM_Sort_uint64_QuickSortAdversary_262144                               212 ns          212 ns      3407872

Differential Revision: https://reviews.llvm.org/D113413
This commit is contained in:
Nilay Vaish 2021-11-16 11:37:55 -05:00 committed by Louis Dionne
parent 4eda928660
commit 7f287390d7
3 changed files with 143 additions and 11 deletions
libcxx
benchmarks
include/__algorithm
test/std/algorithms/alg.sorting/alg.sort/sort

View File

@ -38,18 +38,65 @@ enum class Order {
Descending,
SingleElement,
PipeOrgan,
Heap
Heap,
QuickSortAdversary,
};
struct AllOrders : EnumValuesAsTuple<AllOrders, Order, 6> {
struct AllOrders : EnumValuesAsTuple<AllOrders, Order, 7> {
static constexpr const char* Names[] = {"Random", "Ascending",
"Descending", "SingleElement",
"PipeOrgan", "Heap"};
"PipeOrgan", "Heap",
"QuickSortAdversary"};
};
// fillAdversarialQuickSortInput fills the input vector with N int-like values.
// These values are arranged in such a way that they would invoke O(N^2)
// behavior on any quick sort implementation that satisifies certain conditions.
// Details are available in the following paper:
// "A Killer Adversary for Quicksort", M. D. McIlroy, Software—Practice &
// ExperienceVolume 29 Issue 4 April 10, 1999 pp 341344.
// https://dl.acm.org/doi/10.5555/311868.311871.
template <class T>
void fillAdversarialQuickSortInput(T& V, size_t N) {
assert(N > 0);
// If an element is equal to gas, it indicates that the value of the element
// is still to be decided and may change over the course of time.
const int gas = N - 1;
V.resize(N);
for (int i = 0; i < N; ++i) {
V[i] = gas;
}
// Candidate for the pivot position.
int candidate = 0;
int nsolid = 0;
// Populate all positions in the generated input to gas.
std::vector<int> ascVals(V.size());
// Fill up with ascending values from 0 to V.size()-1. These will act as
// indices into V.
std::iota(ascVals.begin(), ascVals.end(), 0);
std::sort(ascVals.begin(), ascVals.end(), [&](int x, int y) {
if (V[x] == gas && V[y] == gas) {
// We are comparing two inputs whose value is still to be decided.
if (x == candidate) {
V[x] = nsolid++;
} else {
V[y] = nsolid++;
}
}
if (V[x] == gas) {
candidate = x;
} else if (V[y] == gas) {
candidate = y;
}
return V[x] < V[y];
});
}
template <typename T>
void fillValues(std::vector<T>& V, size_t N, Order O) {
if (O == Order::SingleElement) {
V.resize(N, 0);
} else if (O == Order::QuickSortAdversary) {
fillAdversarialQuickSortInput(V, N);
} else {
while (V.size() < N)
V.push_back(V.size());
@ -128,6 +175,9 @@ void sortValues(T& V, Order O) {
case Order::Heap:
std::make_heap(V.begin(), V.end());
break;
case Order::QuickSortAdversary:
// Nothing to do
break;
}
}

View File

@ -263,12 +263,14 @@ __insertion_sort_move(_BidirectionalIterator __first1, _BidirectionalIterator __
template <class _Compare, class _RandomAccessIterator>
void
__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
__introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp,
typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type __depth)
{
typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
const difference_type __limit = is_trivially_copy_constructible<value_type>::value &&
is_trivially_copy_assignable<value_type>::value ? 30 : 6;
typedef typename __comp_ref_type<_Compare>::type _Comp_ref;
while (true)
{
__restart:
@ -298,6 +300,13 @@ __sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __c
return;
}
// __len > 5
if (__depth == 0)
{
// Fallback to heap sort as Introsort suggests.
_VSTD::__partial_sort<_Comp_ref>(__first, __last, __last, _Comp_ref(__comp));
return;
}
--__depth;
_RandomAccessIterator __m = __first;
_RandomAccessIterator __lm1 = __last;
--__lm1;
@ -440,19 +449,34 @@ __sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __c
// sort smaller range with recursive call and larger with tail recursion elimination
if (__i - __first < __last - __i)
{
_VSTD::__sort<_Compare>(__first, __i, __comp);
// _VSTD::__sort<_Compare>(__i+1, __last, __comp);
__first = ++__i;
_VSTD::__introsort<_Compare>(__first, __i, __comp, __depth);
__first = ++__i;
}
else
{
_VSTD::__sort<_Compare>(__i+1, __last, __comp);
// _VSTD::__sort<_Compare>(__first, __i, __comp);
__last = __i;
_VSTD::__introsort<_Compare>(__i + 1, __last, __comp, __depth);
__last = __i;
}
}
}
template <typename _Number>
inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) {
_Number __log2 = 0;
while (__n > 1) {
__log2++;
__n >>= 1;
}
return __log2;
}
template <class _Compare, class _RandomAccessIterator>
void __sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
difference_type __depth_limit = 2 * __log2i(__last - __first);
__introsort(__first, __last, __comp, __depth_limit);
}
template <class _Compare, class _Tp>
inline _LIBCPP_INLINE_VISIBILITY
void

View File

@ -147,6 +147,63 @@ test_pointer_sort()
assert(*pv[array_size - 1] == v[array_size - 1]);
}
// test_adversarial_quicksort generates a vector with values arranged in such a
// way that they would invoke O(N^2) behavior on any quick sort implementation
// that satisifies certain conditions. Details are available in the following
// paper:
// "A Killer Adversary for Quicksort", M. D. McIlroy, Software—Practice &
// ExperienceVolume 29 Issue 4 April 10, 1999 pp 341344.
// https://dl.acm.org/doi/10.5555/311868.311871.
struct AdversaryComparator {
AdversaryComparator(int N, std::vector<int>& input) : gas(N - 1), V(input) {
V.resize(N);
// Populate all positions in the generated input to gas to indicate that
// none of the values have been fixed yet.
for (int i = 0; i < N; ++i)
V[i] = gas;
}
bool operator()(int x, int y) {
if (V[x] == gas && V[y] == gas) {
// We are comparing two inputs whose value is still to be decided.
if (x == candidate) {
V[x] = nsolid++;
} else {
V[y] = nsolid++;
}
}
if (V[x] == gas) {
candidate = x;
} else if (V[y] == gas) {
candidate = y;
}
return V[x] < V[y];
}
private:
// If an element is equal to gas, it indicates that the value of the element
// is still to be decided and may change over the course of time.
const int gas;
// This is a reference so that we can manipulate the input vector later.
std::vector<int>& V;
// Candidate for the pivot position.
int candidate = 0;
int nsolid = 0;
};
void test_adversarial_quicksort(int N) {
assert(N > 0);
std::vector<int> ascVals(N);
// Fill up with ascending values from 0 to N-1. These will act as indices
// into V.
std::iota(ascVals.begin(), ascVals.end(), 0);
std::vector<int> V;
AdversaryComparator comp(N, V);
std::sort(ascVals.begin(), ascVals.end(), comp);
std::sort(V.begin(), V.end());
assert(std::is_sorted(V.begin(), V.end()));
}
int main(int, char**)
{
// test null range
@ -171,6 +228,7 @@ int main(int, char**)
test_larger_sorts(1009);
test_pointer_sort();
test_adversarial_quicksort(1 << 20);
return 0;
return 0;
}