llvm-project/libcxx/include/__algorithm/sort.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

556 lines
21 KiB
C
Raw Normal View History

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___ALGORITHM_SORT_H
#define _LIBCPP___ALGORITHM_SORT_H
#include <__config>
#include <__algorithm/comp.h>
#include <__algorithm/comp_ref_type.h>
#include <__algorithm/min_element.h>
#include <__algorithm/partial_sort.h>
#include <__algorithm/unwrap_iter.h>
#include <__utility/swap.h>
#include <memory>
#if defined(_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY)
# include <__algorithm/shuffle.h>
#endif
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
_LIBCPP_BEGIN_NAMESPACE_STD
// stable, 2-3 compares, 0-2 swaps
template <class _Compare, class _ForwardIterator>
_LIBCPP_CONSTEXPR_AFTER_CXX11 unsigned
__sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z, _Compare __c)
{
unsigned __r = 0;
if (!__c(*__y, *__x)) // if x <= y
{
if (!__c(*__z, *__y)) // if y <= z
return __r; // x <= y && y <= z
// x <= y && y > z
swap(*__y, *__z); // x <= z && y < z
__r = 1;
if (__c(*__y, *__x)) // if x > y
{
swap(*__x, *__y); // x < y && y <= z
__r = 2;
}
return __r; // x <= y && y < z
}
if (__c(*__z, *__y)) // x > y, if y > z
{
swap(*__x, *__z); // x < y && y < z
__r = 1;
return __r;
}
swap(*__x, *__y); // x > y && y <= z
__r = 1; // x < y && x <= z
if (__c(*__z, *__y)) // if y > z
{
swap(*__y, *__z); // x <= y && y < z
__r = 2;
}
return __r;
} // x <= y && y <= z
// stable, 3-6 compares, 0-5 swaps
template <class _Compare, class _ForwardIterator>
unsigned
__sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3,
_ForwardIterator __x4, _Compare __c)
{
unsigned __r = _VSTD::__sort3<_Compare>(__x1, __x2, __x3, __c);
if (__c(*__x4, *__x3))
{
swap(*__x3, *__x4);
++__r;
if (__c(*__x3, *__x2))
{
swap(*__x2, *__x3);
++__r;
if (__c(*__x2, *__x1))
{
swap(*__x1, *__x2);
++__r;
}
}
}
return __r;
}
// stable, 4-10 compares, 0-9 swaps
template <class _Compare, class _ForwardIterator>
_LIBCPP_HIDDEN
unsigned
__sort5(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3,
_ForwardIterator __x4, _ForwardIterator __x5, _Compare __c)
{
unsigned __r = _VSTD::__sort4<_Compare>(__x1, __x2, __x3, __x4, __c);
if (__c(*__x5, *__x4))
{
swap(*__x4, *__x5);
++__r;
if (__c(*__x4, *__x3))
{
swap(*__x3, *__x4);
++__r;
if (__c(*__x3, *__x2))
{
swap(*__x2, *__x3);
++__r;
if (__c(*__x2, *__x1))
{
swap(*__x1, *__x2);
++__r;
}
}
}
}
return __r;
}
// Assumes size > 0
template <class _Compare, class _BidirectionalIterator>
_LIBCPP_CONSTEXPR_AFTER_CXX11 void
__selection_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, _Compare __comp)
{
_BidirectionalIterator __lm1 = __last;
for (--__lm1; __first != __lm1; ++__first)
{
_BidirectionalIterator __i = _VSTD::min_element<_BidirectionalIterator, _Compare&>(__first, __last, __comp);
if (__i != __first)
swap(*__first, *__i);
}
}
template <class _Compare, class _BidirectionalIterator>
void
__insertion_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, _Compare __comp)
{
typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type;
if (__first != __last)
{
_BidirectionalIterator __i = __first;
for (++__i; __i != __last; ++__i)
{
_BidirectionalIterator __j = __i;
value_type __t(_VSTD::move(*__j));
for (_BidirectionalIterator __k = __i; __k != __first && __comp(__t, *--__k); --__j)
*__j = _VSTD::move(*__k);
*__j = _VSTD::move(__t);
}
}
}
template <class _Compare, class _RandomAccessIterator>
void
__insertion_sort_3(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
{
typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
_RandomAccessIterator __j = __first+2;
_VSTD::__sort3<_Compare>(__first, __first+1, __j, __comp);
for (_RandomAccessIterator __i = __j+1; __i != __last; ++__i)
{
if (__comp(*__i, *__j))
{
value_type __t(_VSTD::move(*__i));
_RandomAccessIterator __k = __j;
__j = __i;
do
{
*__j = _VSTD::move(*__k);
__j = __k;
} while (__j != __first && __comp(__t, *--__k));
*__j = _VSTD::move(__t);
}
__j = __i;
}
}
template <class _Compare, class _RandomAccessIterator>
bool
__insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
{
switch (__last - __first)
{
case 0:
case 1:
return true;
case 2:
if (__comp(*--__last, *__first))
swap(*__first, *__last);
return true;
case 3:
_VSTD::__sort3<_Compare>(__first, __first+1, --__last, __comp);
return true;
case 4:
_VSTD::__sort4<_Compare>(__first, __first+1, __first+2, --__last, __comp);
return true;
case 5:
_VSTD::__sort5<_Compare>(__first, __first+1, __first+2, __first+3, --__last, __comp);
return true;
}
typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
_RandomAccessIterator __j = __first+2;
_VSTD::__sort3<_Compare>(__first, __first+1, __j, __comp);
const unsigned __limit = 8;
unsigned __count = 0;
for (_RandomAccessIterator __i = __j+1; __i != __last; ++__i)
{
if (__comp(*__i, *__j))
{
value_type __t(_VSTD::move(*__i));
_RandomAccessIterator __k = __j;
__j = __i;
do
{
*__j = _VSTD::move(*__k);
__j = __k;
} while (__j != __first && __comp(__t, *--__k));
*__j = _VSTD::move(__t);
if (++__count == __limit)
return ++__i == __last;
}
__j = __i;
}
return true;
}
template <class _Compare, class _BidirectionalIterator>
void
__insertion_sort_move(_BidirectionalIterator __first1, _BidirectionalIterator __last1,
typename iterator_traits<_BidirectionalIterator>::value_type* __first2, _Compare __comp)
{
typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type;
if (__first1 != __last1)
{
__destruct_n __d(0);
unique_ptr<value_type, __destruct_n&> __h(__first2, __d);
value_type* __last2 = __first2;
::new ((void*)__last2) value_type(_VSTD::move(*__first1));
__d.template __incr<value_type>();
for (++__last2; ++__first1 != __last1; ++__last2)
{
value_type* __j2 = __last2;
value_type* __i2 = __j2;
if (__comp(*__first1, *--__i2))
{
::new ((void*)__j2) value_type(_VSTD::move(*__i2));
__d.template __incr<value_type>();
for (--__j2; __i2 != __first2 && __comp(*__first1, *--__i2); --__j2)
*__j2 = _VSTD::move(*__i2);
*__j2 = _VSTD::move(*__first1);
}
else
{
::new ((void*)__j2) value_type(_VSTD::move(*__first1));
__d.template __incr<value_type>();
}
}
__h.release();
}
}
template <class _Compare, class _RandomAccessIterator>
void
[libc++] Add introsort to avoid O(n^2) behavior This commit adds a benchmark that tests std::sort on an adversarial inputs, and uses introsort in std::sort to avoid O(n^2) behavior on adversarial inputs. Inputs where partitions are unbalanced even after 2 log(n) pivots have been selected, the algorithm switches to heap sort to avoid the possibility of spending O(n^2) time on sorting the input. Benchmark results show that the intro sort implementation does significantly better. Benchmarking results before this change. Time represents the sorting time required per element: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 3.75 ns 3.74 ns 187432960 BM_Sort_uint32_QuickSortAdversary_4 3.05 ns 3.05 ns 231211008 BM_Sort_uint32_QuickSortAdversary_16 2.45 ns 2.45 ns 288096256 BM_Sort_uint32_QuickSortAdversary_64 32.8 ns 32.8 ns 21495808 BM_Sort_uint32_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint32_QuickSortAdversary_1024 498 ns 497 ns 1572864 BM_Sort_uint32_QuickSortAdversary_16384 3846 ns 3845 ns 262144 BM_Sort_uint32_QuickSortAdversary_262144 61431 ns 61400 ns 262144 BM_Sort_uint64_QuickSortAdversary_1 3.93 ns 3.92 ns 181141504 BM_Sort_uint64_QuickSortAdversary_4 3.10 ns 3.09 ns 222560256 BM_Sort_uint64_QuickSortAdversary_16 2.50 ns 2.50 ns 283639808 BM_Sort_uint64_QuickSortAdversary_64 33.2 ns 33.2 ns 21757952 BM_Sort_uint64_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint64_QuickSortAdversary_1024 478 ns 477 ns 1572864 BM_Sort_uint64_QuickSortAdversary_16384 3932 ns 3930 ns 262144 BM_Sort_uint64_QuickSortAdversary_262144 61646 ns 61615 ns 262144 Benchmarking results after this change: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 6.31 ns 6.30 ns 107741184 BM_Sort_uint32_QuickSortAdversary_4 4.51 ns 4.50 ns 158859264 BM_Sort_uint32_QuickSortAdversary_16 3.00 ns 3.00 ns 223608832 BM_Sort_uint32_QuickSortAdversary_64 44.8 ns 44.8 ns 15990784 BM_Sort_uint32_QuickSortAdversary_256 69.0 ns 68.9 ns 9961472 BM_Sort_uint32_QuickSortAdversary_1024 118 ns 118 ns 6029312 BM_Sort_uint32_QuickSortAdversary_16384 175 ns 175 ns 4194304 BM_Sort_uint32_QuickSortAdversary_262144 210 ns 210 ns 3407872 BM_Sort_uint64_QuickSortAdversary_1 6.75 ns 6.73 ns 103809024 BM_Sort_uint64_QuickSortAdversary_4 4.53 ns 4.53 ns 160432128 BM_Sort_uint64_QuickSortAdversary_16 2.98 ns 2.97 ns 234356736 BM_Sort_uint64_QuickSortAdversary_64 44.3 ns 44.3 ns 15990784 BM_Sort_uint64_QuickSortAdversary_256 69.2 ns 69.2 ns 10223616 BM_Sort_uint64_QuickSortAdversary_1024 119 ns 119 ns 6029312 BM_Sort_uint64_QuickSortAdversary_16384 173 ns 173 ns 4194304 BM_Sort_uint64_QuickSortAdversary_262144 212 ns 212 ns 3407872 Differential Revision: https://reviews.llvm.org/D113413
2021-11-17 00:37:55 +08:00
__introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp,
typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type __depth)
{
typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
const difference_type __limit = is_trivially_copy_constructible<value_type>::value &&
is_trivially_copy_assignable<value_type>::value ? 30 : 6;
[libc++] Add introsort to avoid O(n^2) behavior This commit adds a benchmark that tests std::sort on an adversarial inputs, and uses introsort in std::sort to avoid O(n^2) behavior on adversarial inputs. Inputs where partitions are unbalanced even after 2 log(n) pivots have been selected, the algorithm switches to heap sort to avoid the possibility of spending O(n^2) time on sorting the input. Benchmark results show that the intro sort implementation does significantly better. Benchmarking results before this change. Time represents the sorting time required per element: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 3.75 ns 3.74 ns 187432960 BM_Sort_uint32_QuickSortAdversary_4 3.05 ns 3.05 ns 231211008 BM_Sort_uint32_QuickSortAdversary_16 2.45 ns 2.45 ns 288096256 BM_Sort_uint32_QuickSortAdversary_64 32.8 ns 32.8 ns 21495808 BM_Sort_uint32_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint32_QuickSortAdversary_1024 498 ns 497 ns 1572864 BM_Sort_uint32_QuickSortAdversary_16384 3846 ns 3845 ns 262144 BM_Sort_uint32_QuickSortAdversary_262144 61431 ns 61400 ns 262144 BM_Sort_uint64_QuickSortAdversary_1 3.93 ns 3.92 ns 181141504 BM_Sort_uint64_QuickSortAdversary_4 3.10 ns 3.09 ns 222560256 BM_Sort_uint64_QuickSortAdversary_16 2.50 ns 2.50 ns 283639808 BM_Sort_uint64_QuickSortAdversary_64 33.2 ns 33.2 ns 21757952 BM_Sort_uint64_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint64_QuickSortAdversary_1024 478 ns 477 ns 1572864 BM_Sort_uint64_QuickSortAdversary_16384 3932 ns 3930 ns 262144 BM_Sort_uint64_QuickSortAdversary_262144 61646 ns 61615 ns 262144 Benchmarking results after this change: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 6.31 ns 6.30 ns 107741184 BM_Sort_uint32_QuickSortAdversary_4 4.51 ns 4.50 ns 158859264 BM_Sort_uint32_QuickSortAdversary_16 3.00 ns 3.00 ns 223608832 BM_Sort_uint32_QuickSortAdversary_64 44.8 ns 44.8 ns 15990784 BM_Sort_uint32_QuickSortAdversary_256 69.0 ns 68.9 ns 9961472 BM_Sort_uint32_QuickSortAdversary_1024 118 ns 118 ns 6029312 BM_Sort_uint32_QuickSortAdversary_16384 175 ns 175 ns 4194304 BM_Sort_uint32_QuickSortAdversary_262144 210 ns 210 ns 3407872 BM_Sort_uint64_QuickSortAdversary_1 6.75 ns 6.73 ns 103809024 BM_Sort_uint64_QuickSortAdversary_4 4.53 ns 4.53 ns 160432128 BM_Sort_uint64_QuickSortAdversary_16 2.98 ns 2.97 ns 234356736 BM_Sort_uint64_QuickSortAdversary_64 44.3 ns 44.3 ns 15990784 BM_Sort_uint64_QuickSortAdversary_256 69.2 ns 69.2 ns 10223616 BM_Sort_uint64_QuickSortAdversary_1024 119 ns 119 ns 6029312 BM_Sort_uint64_QuickSortAdversary_16384 173 ns 173 ns 4194304 BM_Sort_uint64_QuickSortAdversary_262144 212 ns 212 ns 3407872 Differential Revision: https://reviews.llvm.org/D113413
2021-11-17 00:37:55 +08:00
typedef typename __comp_ref_type<_Compare>::type _Comp_ref;
while (true)
{
__restart:
difference_type __len = __last - __first;
switch (__len)
{
case 0:
case 1:
return;
case 2:
if (__comp(*--__last, *__first))
swap(*__first, *__last);
return;
case 3:
_VSTD::__sort3<_Compare>(__first, __first+1, --__last, __comp);
return;
case 4:
_VSTD::__sort4<_Compare>(__first, __first+1, __first+2, --__last, __comp);
return;
case 5:
_VSTD::__sort5<_Compare>(__first, __first+1, __first+2, __first+3, --__last, __comp);
return;
}
if (__len <= __limit)
{
_VSTD::__insertion_sort_3<_Compare>(__first, __last, __comp);
return;
}
// __len > 5
[libc++] Add introsort to avoid O(n^2) behavior This commit adds a benchmark that tests std::sort on an adversarial inputs, and uses introsort in std::sort to avoid O(n^2) behavior on adversarial inputs. Inputs where partitions are unbalanced even after 2 log(n) pivots have been selected, the algorithm switches to heap sort to avoid the possibility of spending O(n^2) time on sorting the input. Benchmark results show that the intro sort implementation does significantly better. Benchmarking results before this change. Time represents the sorting time required per element: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 3.75 ns 3.74 ns 187432960 BM_Sort_uint32_QuickSortAdversary_4 3.05 ns 3.05 ns 231211008 BM_Sort_uint32_QuickSortAdversary_16 2.45 ns 2.45 ns 288096256 BM_Sort_uint32_QuickSortAdversary_64 32.8 ns 32.8 ns 21495808 BM_Sort_uint32_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint32_QuickSortAdversary_1024 498 ns 497 ns 1572864 BM_Sort_uint32_QuickSortAdversary_16384 3846 ns 3845 ns 262144 BM_Sort_uint32_QuickSortAdversary_262144 61431 ns 61400 ns 262144 BM_Sort_uint64_QuickSortAdversary_1 3.93 ns 3.92 ns 181141504 BM_Sort_uint64_QuickSortAdversary_4 3.10 ns 3.09 ns 222560256 BM_Sort_uint64_QuickSortAdversary_16 2.50 ns 2.50 ns 283639808 BM_Sort_uint64_QuickSortAdversary_64 33.2 ns 33.2 ns 21757952 BM_Sort_uint64_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint64_QuickSortAdversary_1024 478 ns 477 ns 1572864 BM_Sort_uint64_QuickSortAdversary_16384 3932 ns 3930 ns 262144 BM_Sort_uint64_QuickSortAdversary_262144 61646 ns 61615 ns 262144 Benchmarking results after this change: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 6.31 ns 6.30 ns 107741184 BM_Sort_uint32_QuickSortAdversary_4 4.51 ns 4.50 ns 158859264 BM_Sort_uint32_QuickSortAdversary_16 3.00 ns 3.00 ns 223608832 BM_Sort_uint32_QuickSortAdversary_64 44.8 ns 44.8 ns 15990784 BM_Sort_uint32_QuickSortAdversary_256 69.0 ns 68.9 ns 9961472 BM_Sort_uint32_QuickSortAdversary_1024 118 ns 118 ns 6029312 BM_Sort_uint32_QuickSortAdversary_16384 175 ns 175 ns 4194304 BM_Sort_uint32_QuickSortAdversary_262144 210 ns 210 ns 3407872 BM_Sort_uint64_QuickSortAdversary_1 6.75 ns 6.73 ns 103809024 BM_Sort_uint64_QuickSortAdversary_4 4.53 ns 4.53 ns 160432128 BM_Sort_uint64_QuickSortAdversary_16 2.98 ns 2.97 ns 234356736 BM_Sort_uint64_QuickSortAdversary_64 44.3 ns 44.3 ns 15990784 BM_Sort_uint64_QuickSortAdversary_256 69.2 ns 69.2 ns 10223616 BM_Sort_uint64_QuickSortAdversary_1024 119 ns 119 ns 6029312 BM_Sort_uint64_QuickSortAdversary_16384 173 ns 173 ns 4194304 BM_Sort_uint64_QuickSortAdversary_262144 212 ns 212 ns 3407872 Differential Revision: https://reviews.llvm.org/D113413
2021-11-17 00:37:55 +08:00
if (__depth == 0)
{
// Fallback to heap sort as Introsort suggests.
_VSTD::__partial_sort<_Comp_ref>(__first, __last, __last, _Comp_ref(__comp));
return;
}
--__depth;
_RandomAccessIterator __m = __first;
_RandomAccessIterator __lm1 = __last;
--__lm1;
unsigned __n_swaps;
{
difference_type __delta;
if (__len >= 1000)
{
__delta = __len/2;
__m += __delta;
__delta /= 2;
__n_swaps = _VSTD::__sort5<_Compare>(__first, __first + __delta, __m, __m+__delta, __lm1, __comp);
}
else
{
__delta = __len/2;
__m += __delta;
__n_swaps = _VSTD::__sort3<_Compare>(__first, __m, __lm1, __comp);
}
}
// *__m is median
// partition [__first, __m) < *__m and *__m <= [__m, __last)
// (this inhibits tossing elements equivalent to __m around unnecessarily)
_RandomAccessIterator __i = __first;
_RandomAccessIterator __j = __lm1;
// j points beyond range to be tested, *__m is known to be <= *__lm1
// The search going up is known to be guarded but the search coming down isn't.
// Prime the downward search with a guard.
if (!__comp(*__i, *__m)) // if *__first == *__m
{
// *__first == *__m, *__first doesn't go in first part
// manually guard downward moving __j against __i
while (true)
{
if (__i == --__j)
{
// *__first == *__m, *__m <= all other elements
// Parition instead into [__first, __i) == *__first and *__first < [__i, __last)
++__i; // __first + 1
__j = __last;
if (!__comp(*__first, *--__j)) // we need a guard if *__first == *(__last-1)
{
while (true)
{
if (__i == __j)
return; // [__first, __last) all equivalent elements
if (__comp(*__first, *__i))
{
swap(*__i, *__j);
++__n_swaps;
++__i;
break;
}
++__i;
}
}
// [__first, __i) == *__first and *__first < [__j, __last) and __j == __last - 1
if (__i == __j)
return;
while (true)
{
while (!__comp(*__first, *__i))
++__i;
while (__comp(*__first, *--__j))
;
if (__i >= __j)
break;
swap(*__i, *__j);
++__n_swaps;
++__i;
}
// [__first, __i) == *__first and *__first < [__i, __last)
// The first part is sorted, sort the second part
// _VSTD::__sort<_Compare>(__i, __last, __comp);
__first = __i;
goto __restart;
}
if (__comp(*__j, *__m))
{
swap(*__i, *__j);
++__n_swaps;
break; // found guard for downward moving __j, now use unguarded partition
}
}
}
// It is known that *__i < *__m
++__i;
// j points beyond range to be tested, *__m is known to be <= *__lm1
// if not yet partitioned...
if (__i < __j)
{
// known that *(__i - 1) < *__m
// known that __i <= __m
while (true)
{
// __m still guards upward moving __i
while (__comp(*__i, *__m))
++__i;
// It is now known that a guard exists for downward moving __j
while (!__comp(*--__j, *__m))
;
if (__i > __j)
break;
swap(*__i, *__j);
++__n_swaps;
// It is known that __m != __j
// If __m just moved, follow it
if (__m == __i)
__m = __j;
++__i;
}
}
// [__first, __i) < *__m and *__m <= [__i, __last)
if (__i != __m && __comp(*__m, *__i))
{
swap(*__i, *__m);
++__n_swaps;
}
// [__first, __i) < *__i and *__i <= [__i+1, __last)
// If we were given a perfect partition, see if insertion sort is quick...
if (__n_swaps == 0)
{
bool __fs = _VSTD::__insertion_sort_incomplete<_Compare>(__first, __i, __comp);
if (_VSTD::__insertion_sort_incomplete<_Compare>(__i+1, __last, __comp))
{
if (__fs)
return;
__last = __i;
continue;
}
else
{
if (__fs)
{
__first = ++__i;
continue;
}
}
}
// sort smaller range with recursive call and larger with tail recursion elimination
if (__i - __first < __last - __i)
{
[libc++] Add introsort to avoid O(n^2) behavior This commit adds a benchmark that tests std::sort on an adversarial inputs, and uses introsort in std::sort to avoid O(n^2) behavior on adversarial inputs. Inputs where partitions are unbalanced even after 2 log(n) pivots have been selected, the algorithm switches to heap sort to avoid the possibility of spending O(n^2) time on sorting the input. Benchmark results show that the intro sort implementation does significantly better. Benchmarking results before this change. Time represents the sorting time required per element: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 3.75 ns 3.74 ns 187432960 BM_Sort_uint32_QuickSortAdversary_4 3.05 ns 3.05 ns 231211008 BM_Sort_uint32_QuickSortAdversary_16 2.45 ns 2.45 ns 288096256 BM_Sort_uint32_QuickSortAdversary_64 32.8 ns 32.8 ns 21495808 BM_Sort_uint32_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint32_QuickSortAdversary_1024 498 ns 497 ns 1572864 BM_Sort_uint32_QuickSortAdversary_16384 3846 ns 3845 ns 262144 BM_Sort_uint32_QuickSortAdversary_262144 61431 ns 61400 ns 262144 BM_Sort_uint64_QuickSortAdversary_1 3.93 ns 3.92 ns 181141504 BM_Sort_uint64_QuickSortAdversary_4 3.10 ns 3.09 ns 222560256 BM_Sort_uint64_QuickSortAdversary_16 2.50 ns 2.50 ns 283639808 BM_Sort_uint64_QuickSortAdversary_64 33.2 ns 33.2 ns 21757952 BM_Sort_uint64_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint64_QuickSortAdversary_1024 478 ns 477 ns 1572864 BM_Sort_uint64_QuickSortAdversary_16384 3932 ns 3930 ns 262144 BM_Sort_uint64_QuickSortAdversary_262144 61646 ns 61615 ns 262144 Benchmarking results after this change: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 6.31 ns 6.30 ns 107741184 BM_Sort_uint32_QuickSortAdversary_4 4.51 ns 4.50 ns 158859264 BM_Sort_uint32_QuickSortAdversary_16 3.00 ns 3.00 ns 223608832 BM_Sort_uint32_QuickSortAdversary_64 44.8 ns 44.8 ns 15990784 BM_Sort_uint32_QuickSortAdversary_256 69.0 ns 68.9 ns 9961472 BM_Sort_uint32_QuickSortAdversary_1024 118 ns 118 ns 6029312 BM_Sort_uint32_QuickSortAdversary_16384 175 ns 175 ns 4194304 BM_Sort_uint32_QuickSortAdversary_262144 210 ns 210 ns 3407872 BM_Sort_uint64_QuickSortAdversary_1 6.75 ns 6.73 ns 103809024 BM_Sort_uint64_QuickSortAdversary_4 4.53 ns 4.53 ns 160432128 BM_Sort_uint64_QuickSortAdversary_16 2.98 ns 2.97 ns 234356736 BM_Sort_uint64_QuickSortAdversary_64 44.3 ns 44.3 ns 15990784 BM_Sort_uint64_QuickSortAdversary_256 69.2 ns 69.2 ns 10223616 BM_Sort_uint64_QuickSortAdversary_1024 119 ns 119 ns 6029312 BM_Sort_uint64_QuickSortAdversary_16384 173 ns 173 ns 4194304 BM_Sort_uint64_QuickSortAdversary_262144 212 ns 212 ns 3407872 Differential Revision: https://reviews.llvm.org/D113413
2021-11-17 00:37:55 +08:00
_VSTD::__introsort<_Compare>(__first, __i, __comp, __depth);
__first = ++__i;
}
else
{
[libc++] Add introsort to avoid O(n^2) behavior This commit adds a benchmark that tests std::sort on an adversarial inputs, and uses introsort in std::sort to avoid O(n^2) behavior on adversarial inputs. Inputs where partitions are unbalanced even after 2 log(n) pivots have been selected, the algorithm switches to heap sort to avoid the possibility of spending O(n^2) time on sorting the input. Benchmark results show that the intro sort implementation does significantly better. Benchmarking results before this change. Time represents the sorting time required per element: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 3.75 ns 3.74 ns 187432960 BM_Sort_uint32_QuickSortAdversary_4 3.05 ns 3.05 ns 231211008 BM_Sort_uint32_QuickSortAdversary_16 2.45 ns 2.45 ns 288096256 BM_Sort_uint32_QuickSortAdversary_64 32.8 ns 32.8 ns 21495808 BM_Sort_uint32_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint32_QuickSortAdversary_1024 498 ns 497 ns 1572864 BM_Sort_uint32_QuickSortAdversary_16384 3846 ns 3845 ns 262144 BM_Sort_uint32_QuickSortAdversary_262144 61431 ns 61400 ns 262144 BM_Sort_uint64_QuickSortAdversary_1 3.93 ns 3.92 ns 181141504 BM_Sort_uint64_QuickSortAdversary_4 3.10 ns 3.09 ns 222560256 BM_Sort_uint64_QuickSortAdversary_16 2.50 ns 2.50 ns 283639808 BM_Sort_uint64_QuickSortAdversary_64 33.2 ns 33.2 ns 21757952 BM_Sort_uint64_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint64_QuickSortAdversary_1024 478 ns 477 ns 1572864 BM_Sort_uint64_QuickSortAdversary_16384 3932 ns 3930 ns 262144 BM_Sort_uint64_QuickSortAdversary_262144 61646 ns 61615 ns 262144 Benchmarking results after this change: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 6.31 ns 6.30 ns 107741184 BM_Sort_uint32_QuickSortAdversary_4 4.51 ns 4.50 ns 158859264 BM_Sort_uint32_QuickSortAdversary_16 3.00 ns 3.00 ns 223608832 BM_Sort_uint32_QuickSortAdversary_64 44.8 ns 44.8 ns 15990784 BM_Sort_uint32_QuickSortAdversary_256 69.0 ns 68.9 ns 9961472 BM_Sort_uint32_QuickSortAdversary_1024 118 ns 118 ns 6029312 BM_Sort_uint32_QuickSortAdversary_16384 175 ns 175 ns 4194304 BM_Sort_uint32_QuickSortAdversary_262144 210 ns 210 ns 3407872 BM_Sort_uint64_QuickSortAdversary_1 6.75 ns 6.73 ns 103809024 BM_Sort_uint64_QuickSortAdversary_4 4.53 ns 4.53 ns 160432128 BM_Sort_uint64_QuickSortAdversary_16 2.98 ns 2.97 ns 234356736 BM_Sort_uint64_QuickSortAdversary_64 44.3 ns 44.3 ns 15990784 BM_Sort_uint64_QuickSortAdversary_256 69.2 ns 69.2 ns 10223616 BM_Sort_uint64_QuickSortAdversary_1024 119 ns 119 ns 6029312 BM_Sort_uint64_QuickSortAdversary_16384 173 ns 173 ns 4194304 BM_Sort_uint64_QuickSortAdversary_262144 212 ns 212 ns 3407872 Differential Revision: https://reviews.llvm.org/D113413
2021-11-17 00:37:55 +08:00
_VSTD::__introsort<_Compare>(__i + 1, __last, __comp, __depth);
__last = __i;
}
}
}
[libc++] Add introsort to avoid O(n^2) behavior This commit adds a benchmark that tests std::sort on an adversarial inputs, and uses introsort in std::sort to avoid O(n^2) behavior on adversarial inputs. Inputs where partitions are unbalanced even after 2 log(n) pivots have been selected, the algorithm switches to heap sort to avoid the possibility of spending O(n^2) time on sorting the input. Benchmark results show that the intro sort implementation does significantly better. Benchmarking results before this change. Time represents the sorting time required per element: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 3.75 ns 3.74 ns 187432960 BM_Sort_uint32_QuickSortAdversary_4 3.05 ns 3.05 ns 231211008 BM_Sort_uint32_QuickSortAdversary_16 2.45 ns 2.45 ns 288096256 BM_Sort_uint32_QuickSortAdversary_64 32.8 ns 32.8 ns 21495808 BM_Sort_uint32_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint32_QuickSortAdversary_1024 498 ns 497 ns 1572864 BM_Sort_uint32_QuickSortAdversary_16384 3846 ns 3845 ns 262144 BM_Sort_uint32_QuickSortAdversary_262144 61431 ns 61400 ns 262144 BM_Sort_uint64_QuickSortAdversary_1 3.93 ns 3.92 ns 181141504 BM_Sort_uint64_QuickSortAdversary_4 3.10 ns 3.09 ns 222560256 BM_Sort_uint64_QuickSortAdversary_16 2.50 ns 2.50 ns 283639808 BM_Sort_uint64_QuickSortAdversary_64 33.2 ns 33.2 ns 21757952 BM_Sort_uint64_QuickSortAdversary_256 132 ns 132 ns 5505024 BM_Sort_uint64_QuickSortAdversary_1024 478 ns 477 ns 1572864 BM_Sort_uint64_QuickSortAdversary_16384 3932 ns 3930 ns 262144 BM_Sort_uint64_QuickSortAdversary_262144 61646 ns 61615 ns 262144 Benchmarking results after this change: ---------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------- BM_Sort_uint32_QuickSortAdversary_1 6.31 ns 6.30 ns 107741184 BM_Sort_uint32_QuickSortAdversary_4 4.51 ns 4.50 ns 158859264 BM_Sort_uint32_QuickSortAdversary_16 3.00 ns 3.00 ns 223608832 BM_Sort_uint32_QuickSortAdversary_64 44.8 ns 44.8 ns 15990784 BM_Sort_uint32_QuickSortAdversary_256 69.0 ns 68.9 ns 9961472 BM_Sort_uint32_QuickSortAdversary_1024 118 ns 118 ns 6029312 BM_Sort_uint32_QuickSortAdversary_16384 175 ns 175 ns 4194304 BM_Sort_uint32_QuickSortAdversary_262144 210 ns 210 ns 3407872 BM_Sort_uint64_QuickSortAdversary_1 6.75 ns 6.73 ns 103809024 BM_Sort_uint64_QuickSortAdversary_4 4.53 ns 4.53 ns 160432128 BM_Sort_uint64_QuickSortAdversary_16 2.98 ns 2.97 ns 234356736 BM_Sort_uint64_QuickSortAdversary_64 44.3 ns 44.3 ns 15990784 BM_Sort_uint64_QuickSortAdversary_256 69.2 ns 69.2 ns 10223616 BM_Sort_uint64_QuickSortAdversary_1024 119 ns 119 ns 6029312 BM_Sort_uint64_QuickSortAdversary_16384 173 ns 173 ns 4194304 BM_Sort_uint64_QuickSortAdversary_262144 212 ns 212 ns 3407872 Differential Revision: https://reviews.llvm.org/D113413
2021-11-17 00:37:55 +08:00
template <typename _Number>
inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) {
_Number __log2 = 0;
while (__n > 1) {
__log2++;
__n >>= 1;
}
return __log2;
}
template <class _Compare, class _RandomAccessIterator>
void __sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
difference_type __depth_limit = 2 * __log2i(__last - __first);
__introsort(__first, __last, __comp, __depth_limit);
}
template <class _Compare, class _Tp>
inline _LIBCPP_INLINE_VISIBILITY
void
__sort(_Tp** __first, _Tp** __last, __less<_Tp*>&)
{
__less<uintptr_t> __comp;
_VSTD::__sort<__less<uintptr_t>&, uintptr_t*>((uintptr_t*)__first, (uintptr_t*)__last, __comp);
}
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<char>&, char*>(char*, char*, __less<char>&))
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<wchar_t>&, wchar_t*>(wchar_t*, wchar_t*, __less<wchar_t>&))
#endif
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<signed char>&, signed char*>(signed char*, signed char*, __less<signed char>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<unsigned char>&, unsigned char*>(unsigned char*, unsigned char*, __less<unsigned char>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<short>&, short*>(short*, short*, __less<short>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<unsigned short>&, unsigned short*>(unsigned short*, unsigned short*, __less<unsigned short>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<int>&, int*>(int*, int*, __less<int>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<unsigned>&, unsigned*>(unsigned*, unsigned*, __less<unsigned>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<long>&, long*>(long*, long*, __less<long>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<unsigned long>&, unsigned long*>(unsigned long*, unsigned long*, __less<unsigned long>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<long long>&, long long*>(long long*, long long*, __less<long long>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<unsigned long long>&, unsigned long long*>(unsigned long long*, unsigned long long*, __less<unsigned long long>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<float>&, float*>(float*, float*, __less<float>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<double>&, double*>(double*, double*, __less<double>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<long double>&, long double*>(long double*, long double*, __less<long double>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<char>&, char*>(char*, char*, __less<char>&))
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<wchar_t>&, wchar_t*>(wchar_t*, wchar_t*, __less<wchar_t>&))
#endif
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<signed char>&, signed char*>(signed char*, signed char*, __less<signed char>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<unsigned char>&, unsigned char*>(unsigned char*, unsigned char*, __less<unsigned char>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<short>&, short*>(short*, short*, __less<short>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<unsigned short>&, unsigned short*>(unsigned short*, unsigned short*, __less<unsigned short>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<int>&, int*>(int*, int*, __less<int>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<unsigned>&, unsigned*>(unsigned*, unsigned*, __less<unsigned>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<long>&, long*>(long*, long*, __less<long>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<unsigned long>&, unsigned long*>(unsigned long*, unsigned long*, __less<unsigned long>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<long long>&, long long*>(long long*, long long*, __less<long long>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<unsigned long long>&, unsigned long long*>(unsigned long long*, unsigned long long*, __less<unsigned long long>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<float>&, float*>(float*, float*, __less<float>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<double>&, double*>(double*, double*, __less<double>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<long double>&, long double*>(long double*, long double*, __less<long double>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS unsigned __sort5<__less<long double>&, long double*>(long double*, long double*, long double*, long double*, long double*, __less<long double>&))
template <class _RandomAccessIterator, class _Compare>
inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
void
sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
{
_LIBCPP_DEBUG_RANDOMIZE_RANGE(__first, __last);
typedef typename __comp_ref_type<_Compare>::type _Comp_ref;
if (__libcpp_is_constant_evaluated()) {
_VSTD::__partial_sort<_Comp_ref>(__first, __last, __last, _Comp_ref(__comp));
} else {
_VSTD::__sort<_Comp_ref>(_VSTD::__unwrap_iter(__first), _VSTD::__unwrap_iter(__last), _Comp_ref(__comp));
}
}
template <class _RandomAccessIterator>
inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
void
sort(_RandomAccessIterator __first, _RandomAccessIterator __last)
{
_VSTD::sort(__first, __last, __less<typename iterator_traits<_RandomAccessIterator>::value_type>());
}
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___ALGORITHM_SORT_H