forked from OSchip/llvm-project
[libc++] Improve charconv base10 algorithm.
This change is a preparation to add the 128-bit integral output. Before ``` -------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------- BM_to_chars_good/2 20.1 ns 20.1 ns 35045000 BM_to_chars_good/3 117 ns 117 ns 5916000 BM_to_chars_good/4 83.7 ns 83.7 ns 8401000 BM_to_chars_good/5 70.6 ns 70.6 ns 9915000 BM_to_chars_good/6 59.9 ns 59.9 ns 11678000 BM_to_chars_good/7 53.9 ns 53.8 ns 12995000 BM_to_chars_good/8 19.0 ns 19.0 ns 37110000 BM_to_chars_good/9 45.9 ns 45.8 ns 15278000 BM_to_chars_good/10 9.24 ns 9.24 ns 75343000 BM_to_chars_good/11 42.6 ns 42.6 ns 16449000 BM_to_chars_good/12 38.8 ns 38.8 ns 18101000 BM_to_chars_good/13 38.8 ns 38.8 ns 17999000 BM_to_chars_good/14 37.7 ns 37.6 ns 18571000 BM_to_chars_good/15 35.8 ns 35.8 ns 19660000 BM_to_chars_good/16 15.4 ns 15.4 ns 46129000 BM_to_chars_good/17 32.3 ns 32.3 ns 21763000 BM_to_chars_good/18 32.8 ns 32.8 ns 21396000 BM_to_chars_good/19 33.4 ns 33.4 ns 21078000 BM_to_chars_good/20 33.3 ns 33.3 ns 21020000 BM_to_chars_good/21 32.3 ns 32.3 ns 21807000 BM_to_chars_good/22 31.6 ns 31.6 ns 22057000 BM_to_chars_good/23 30.7 ns 30.7 ns 22938000 BM_to_chars_good/24 28.3 ns 28.3 ns 24659000 BM_to_chars_good/25 28.2 ns 28.2 ns 24790000 BM_to_chars_good/26 28.4 ns 28.4 ns 24410000 BM_to_chars_good/27 28.7 ns 28.7 ns 24423000 BM_to_chars_good/28 28.9 ns 28.9 ns 24139000 BM_to_chars_good/29 28.9 ns 28.9 ns 24347000 BM_to_chars_good/30 29.2 ns 29.2 ns 24141000 BM_to_chars_good/31 29.6 ns 29.6 ns 23699000 BM_to_chars_good/32 29.5 ns 29.5 ns 23933000 BM_to_chars_good/33 28.9 ns 28.9 ns 24042000 BM_to_chars_good/34 28.7 ns 28.7 ns 24361000 BM_to_chars_good/35 28.3 ns 28.3 ns 24703000 BM_to_chars_good/36 28.1 ns 28.1 ns 24924000 BM_to_chars_bad/2 6.16 ns 6.15 ns 114101000 BM_to_chars_bad/3 14.5 ns 14.5 ns 48244000 BM_to_chars_bad/4 16.9 ns 16.9 ns 41974000 BM_to_chars_bad/5 12.5 ns 12.5 ns 56080000 BM_to_chars_bad/6 10.9 ns 10.9 ns 64036000 BM_to_chars_bad/7 14.5 ns 14.5 ns 47294000 BM_to_chars_bad/8 6.36 ns 6.35 ns 110430000 BM_to_chars_bad/9 12.4 ns 12.4 ns 56448000 BM_to_chars_bad/10 5.13 ns 5.13 ns 137596000 BM_to_chars_bad/11 9.88 ns 9.88 ns 69015000 BM_to_chars_bad/12 10.8 ns 10.8 ns 63990000 BM_to_chars_bad/13 10.7 ns 10.7 ns 65066000 BM_to_chars_bad/14 9.71 ns 9.71 ns 71775000 BM_to_chars_bad/15 9.18 ns 9.18 ns 75267000 BM_to_chars_bad/16 6.12 ns 6.12 ns 115000000 BM_to_chars_bad/17 10.7 ns 10.7 ns 65504000 BM_to_chars_bad/18 10.6 ns 10.6 ns 65685000 BM_to_chars_bad/19 9.98 ns 9.98 ns 69894000 BM_to_chars_bad/20 9.74 ns 9.74 ns 72098000 BM_to_chars_bad/21 9.25 ns 9.25 ns 75184000 BM_to_chars_bad/22 9.10 ns 9.10 ns 75602000 BM_to_chars_bad/23 9.48 ns 9.48 ns 72824000 BM_to_chars_bad/24 9.27 ns 9.27 ns 75112000 BM_to_chars_bad/25 9.61 ns 9.61 ns 72080000 BM_to_chars_bad/26 9.72 ns 9.72 ns 72178000 BM_to_chars_bad/27 10.0 ns 10.0 ns 69733000 BM_to_chars_bad/28 10.3 ns 10.3 ns 67409000 BM_to_chars_bad/29 9.97 ns 9.97 ns 69193000 BM_to_chars_bad/30 10.1 ns 10.1 ns 69007000 BM_to_chars_bad/31 9.68 ns 9.68 ns 72232000 BM_to_chars_bad/32 8.99 ns 8.99 ns 76825000 BM_to_chars_bad/33 8.82 ns 8.82 ns 79293000 BM_to_chars_bad/34 8.64 ns 8.64 ns 80441000 BM_to_chars_bad/35 8.96 ns 8.96 ns 75320000 BM_to_chars_bad/36 8.87 ns 8.87 ns 77293000 ``` After ``` -------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------- BM_to_chars_good/2 14.7 ns 14.7 ns 47583000 BM_to_chars_good/3 101 ns 101 ns 6901000 BM_to_chars_good/4 68.4 ns 68.4 ns 10088000 BM_to_chars_good/5 58.2 ns 58.2 ns 12007000 BM_to_chars_good/6 51.1 ns 51.1 ns 13687000 BM_to_chars_good/7 45.6 ns 45.6 ns 15323000 BM_to_chars_good/8 14.6 ns 14.6 ns 47795000 BM_to_chars_good/9 40.7 ns 40.7 ns 17371000 BM_to_chars_good/10 7.48 ns 7.48 ns 90931000 BM_to_chars_good/11 37.6 ns 37.6 ns 18542000 BM_to_chars_good/12 35.2 ns 35.2 ns 19922000 BM_to_chars_good/13 34.9 ns 34.9 ns 20105000 BM_to_chars_good/14 33.5 ns 33.5 ns 20863000 BM_to_chars_good/15 31.9 ns 31.9 ns 22014000 BM_to_chars_good/16 11.7 ns 11.7 ns 60012000 BM_to_chars_good/17 28.9 ns 28.9 ns 24148000 BM_to_chars_good/18 29.0 ns 29.0 ns 24317000 BM_to_chars_good/19 28.7 ns 28.7 ns 24363000 BM_to_chars_good/20 28.1 ns 28.1 ns 24899000 BM_to_chars_good/21 27.5 ns 27.5 ns 25499000 BM_to_chars_good/22 26.9 ns 26.9 ns 25929000 BM_to_chars_good/23 26.2 ns 26.2 ns 26828000 BM_to_chars_good/24 25.1 ns 25.1 ns 27742000 BM_to_chars_good/25 25.3 ns 25.3 ns 27720000 BM_to_chars_good/26 25.2 ns 25.2 ns 27789000 BM_to_chars_good/27 25.3 ns 25.3 ns 27777000 BM_to_chars_good/28 25.3 ns 25.3 ns 27643000 BM_to_chars_good/29 25.3 ns 25.3 ns 27750000 BM_to_chars_good/30 25.4 ns 25.4 ns 27566000 BM_to_chars_good/31 25.4 ns 25.4 ns 27611000 BM_to_chars_good/32 25.8 ns 25.8 ns 27218000 BM_to_chars_good/33 25.7 ns 25.7 ns 27070000 BM_to_chars_good/34 26.1 ns 26.1 ns 26693000 BM_to_chars_good/35 26.4 ns 26.4 ns 26486000 BM_to_chars_good/36 26.3 ns 26.3 ns 26619000 BM_to_chars_bad/2 5.99 ns 5.99 ns 118787000 BM_to_chars_bad/3 14.3 ns 14.3 ns 48567000 BM_to_chars_bad/4 16.0 ns 16.0 ns 43239000 BM_to_chars_bad/5 12.6 ns 12.6 ns 55354000 BM_to_chars_bad/6 10.7 ns 10.7 ns 65491000 BM_to_chars_bad/7 14.4 ns 14.4 ns 48723000 BM_to_chars_bad/8 6.50 ns 6.50 ns 104967000 BM_to_chars_bad/9 12.0 ns 12.0 ns 56552000 BM_to_chars_bad/10 5.16 ns 5.16 ns 136380000 BM_to_chars_bad/11 10.5 ns 10.5 ns 66764000 BM_to_chars_bad/12 10.7 ns 10.7 ns 65534000 BM_to_chars_bad/13 11.0 ns 11.0 ns 63426000 BM_to_chars_bad/14 9.90 ns 9.90 ns 68575000 BM_to_chars_bad/15 9.52 ns 9.52 ns 70932000 BM_to_chars_bad/16 6.14 ns 6.14 ns 111762000 BM_to_chars_bad/17 10.6 ns 10.6 ns 65883000 BM_to_chars_bad/18 10.5 ns 10.5 ns 67606000 BM_to_chars_bad/19 9.96 ns 9.96 ns 68898000 BM_to_chars_bad/20 9.40 ns 9.41 ns 73116000 BM_to_chars_bad/21 9.12 ns 9.12 ns 78647000 BM_to_chars_bad/22 8.95 ns 8.95 ns 80211000 BM_to_chars_bad/23 9.50 ns 9.49 ns 73571000 BM_to_chars_bad/24 9.29 ns 9.29 ns 74690000 BM_to_chars_bad/25 9.65 ns 9.65 ns 72877000 BM_to_chars_bad/26 9.78 ns 9.78 ns 70171000 BM_to_chars_bad/27 10.1 ns 10.1 ns 69543000 BM_to_chars_bad/28 10.4 ns 10.4 ns 67582000 BM_to_chars_bad/29 10.00 ns 10.00 ns 70806000 BM_to_chars_bad/30 9.99 ns 9.99 ns 70340000 BM_to_chars_bad/31 9.56 ns 9.56 ns 74159000 BM_to_chars_bad/32 8.97 ns 8.97 ns 78052000 BM_to_chars_bad/33 8.86 ns 8.86 ns 78586000 BM_to_chars_bad/34 8.81 ns 8.81 ns 78562000 BM_to_chars_bad/35 8.90 ns 8.90 ns 77384000 BM_to_chars_bad/36 9.04 ns 9.04 ns 77263000 ``` Reviewed By: #libc, ldionne Differential Revision: https://reviews.llvm.org/D127764
This commit is contained in:
parent
5517bc6c4a
commit
3561ee586e
|
@ -10,10 +10,10 @@
|
|||
#ifndef _LIBCPP___CHARCONV_TO_CHARS_BASE_10_H
|
||||
#define _LIBCPP___CHARCONV_TO_CHARS_BASE_10_H
|
||||
|
||||
#include <__algorithm/copy_n.h>
|
||||
#include <__charconv/tables.h>
|
||||
#include <__config>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
|
||||
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
|
||||
# pragma GCC system_header
|
||||
|
@ -25,98 +25,97 @@ _LIBCPP_BEGIN_NAMESPACE_STD
|
|||
|
||||
namespace __itoa {
|
||||
|
||||
template <class _Tp>
|
||||
_LIBCPP_HIDE_FROM_ABI char* __append1(char* __buffer, _Tp __value) noexcept {
|
||||
*__buffer = '0' + static_cast<char>(__value);
|
||||
return __buffer + 1;
|
||||
_LIBCPP_HIDE_FROM_ABI inline char* __append1(char* __first, uint32_t __value) noexcept {
|
||||
*__first = '0' + static_cast<char>(__value);
|
||||
return __first + 1;
|
||||
}
|
||||
|
||||
template <class _Tp>
|
||||
_LIBCPP_HIDE_FROM_ABI char* __append2(char* __buffer, _Tp __value) noexcept {
|
||||
std::memcpy(__buffer, &__table<>::__digits_base_10[(__value)*2], 2);
|
||||
return __buffer + 2;
|
||||
_LIBCPP_HIDE_FROM_ABI inline char* __append2(char* __first, uint32_t __value) noexcept {
|
||||
return std::copy_n(&__table<>::__digits_base_10[__value * 2], 2, __first);
|
||||
}
|
||||
|
||||
template <class _Tp>
|
||||
_LIBCPP_HIDE_FROM_ABI char* __append3(char* __buffer, _Tp __value) noexcept {
|
||||
return __itoa::__append2(__itoa::__append1(__buffer, (__value) / 100), (__value) % 100);
|
||||
_LIBCPP_HIDE_FROM_ABI inline char* __append3(char* __first, uint32_t __value) noexcept {
|
||||
return __itoa::__append2(__itoa::__append1(__first, __value / 100), __value % 100);
|
||||
}
|
||||
|
||||
template <class _Tp>
|
||||
_LIBCPP_HIDE_FROM_ABI char* __append4(char* __buffer, _Tp __value) noexcept {
|
||||
return __itoa::__append2(__itoa::__append2(__buffer, (__value) / 100), (__value) % 100);
|
||||
_LIBCPP_HIDE_FROM_ABI inline char* __append4(char* __first, uint32_t __value) noexcept {
|
||||
return __itoa::__append2(__itoa::__append2(__first, __value / 100), __value % 100);
|
||||
}
|
||||
|
||||
template <class _Tp>
|
||||
_LIBCPP_HIDE_FROM_ABI char* __append2_no_zeros(char* __buffer, _Tp __value) noexcept {
|
||||
if (__value < 10)
|
||||
return __itoa::__append1(__buffer, __value);
|
||||
else
|
||||
return __itoa::__append2(__buffer, __value);
|
||||
_LIBCPP_HIDE_FROM_ABI inline char* __append5(char* __first, uint32_t __value) noexcept {
|
||||
return __itoa::__append4(__itoa::__append1(__first, __value / 10000), __value % 10000);
|
||||
}
|
||||
|
||||
template <class _Tp>
|
||||
_LIBCPP_HIDE_FROM_ABI char* __append4_no_zeros(char* __buffer, _Tp __value) noexcept {
|
||||
if (__value < 100)
|
||||
return __itoa::__append2_no_zeros(__buffer, __value);
|
||||
else if (__value < 1000)
|
||||
return __itoa::__append3(__buffer, __value);
|
||||
else
|
||||
return __itoa::__append4(__buffer, __value);
|
||||
_LIBCPP_HIDE_FROM_ABI inline char* __append6(char* __first, uint32_t __value) noexcept {
|
||||
return __itoa::__append4(__itoa::__append2(__first, __value / 10000), __value % 10000);
|
||||
}
|
||||
|
||||
template <class _Tp>
|
||||
_LIBCPP_HIDE_FROM_ABI char* __append8_no_zeros(char* __buffer, _Tp __value) noexcept {
|
||||
if (__value < 10000)
|
||||
__buffer = __itoa::__append4_no_zeros(__buffer, __value);
|
||||
else {
|
||||
__buffer = __itoa::__append4_no_zeros(__buffer, __value / 10000);
|
||||
__buffer = __itoa::__append4(__buffer, __value % 10000);
|
||||
}
|
||||
return __buffer;
|
||||
_LIBCPP_HIDE_FROM_ABI inline char* __append7(char* __first, uint32_t __value) noexcept {
|
||||
return __itoa::__append6(__itoa::__append1(__first, __value / 1000000), __value % 1000000);
|
||||
}
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u32(uint32_t __value, char* __buffer) noexcept {
|
||||
if (__value < 100000000)
|
||||
__buffer = __itoa::__append8_no_zeros(__buffer, __value);
|
||||
else {
|
||||
// __value = aabbbbcccc in decimal
|
||||
const uint32_t __a = __value / 100000000; // 1 to 42
|
||||
__value %= 100000000;
|
||||
_LIBCPP_HIDE_FROM_ABI inline char* __append8(char* __first, uint32_t __value) noexcept {
|
||||
return __itoa::__append6(__itoa::__append2(__first, __value / 1000000), __value % 1000000);
|
||||
}
|
||||
|
||||
__buffer = __itoa::__append2_no_zeros(__buffer, __a);
|
||||
__buffer = __itoa::__append4(__buffer, __value / 10000);
|
||||
__buffer = __itoa::__append4(__buffer, __value % 10000);
|
||||
_LIBCPP_HIDE_FROM_ABI inline char* __append9(char* __first, uint32_t __value) noexcept {
|
||||
return __itoa::__append8(__itoa::__append1(__first, __value / 100000000), __value % 100000000);
|
||||
}
|
||||
|
||||
// This function is used for uint32_t and uint64_t.
|
||||
template <class _Tp>
|
||||
_LIBCPP_HIDE_FROM_ABI char* __append10(char* __first, _Tp __value) noexcept {
|
||||
return __itoa::__append8(__itoa::__append2(__first, static_cast<uint32_t>(__value / 100000000)),
|
||||
static_cast<uint32_t>(__value % 100000000));
|
||||
}
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u32(char* __first, uint32_t __value) noexcept {
|
||||
if (__value < 1000000) {
|
||||
if (__value < 10000) {
|
||||
if (__value < 100) {
|
||||
// 0 <= __value < 100
|
||||
if (__value < 10)
|
||||
return __itoa::__append1(__first, __value);
|
||||
return __itoa::__append2(__first, __value);
|
||||
}
|
||||
// 100 <= __value < 10'000
|
||||
if (__value < 1000)
|
||||
return __itoa::__append3(__first, __value);
|
||||
return __itoa::__append4(__first, __value);
|
||||
}
|
||||
|
||||
// 10'000 <= __value < 1'000'000
|
||||
if (__value < 100000)
|
||||
return __itoa::__append5(__first, __value);
|
||||
return __itoa::__append6(__first, __value);
|
||||
}
|
||||
|
||||
return __buffer;
|
||||
}
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u64(uint64_t __value, char* __buffer) noexcept {
|
||||
if (__value < 100000000)
|
||||
__buffer = __itoa::__append8_no_zeros(__buffer, static_cast<uint32_t>(__value));
|
||||
else if (__value < 10000000000000000) {
|
||||
const uint32_t __v0 = static_cast<uint32_t>(__value / 100000000);
|
||||
const uint32_t __v1 = static_cast<uint32_t>(__value % 100000000);
|
||||
|
||||
__buffer = __itoa::__append8_no_zeros(__buffer, __v0);
|
||||
__buffer = __itoa::__append4(__buffer, __v1 / 10000);
|
||||
__buffer = __itoa::__append4(__buffer, __v1 % 10000);
|
||||
} else {
|
||||
const uint32_t __a = static_cast<uint32_t>(__value / 10000000000000000); // 1 to 1844
|
||||
__value %= 10000000000000000;
|
||||
|
||||
__buffer = __itoa::__append4_no_zeros(__buffer, __a);
|
||||
|
||||
const uint32_t __v0 = static_cast<uint32_t>(__value / 100000000);
|
||||
const uint32_t __v1 = static_cast<uint32_t>(__value % 100000000);
|
||||
__buffer = __itoa::__append4(__buffer, __v0 / 10000);
|
||||
__buffer = __itoa::__append4(__buffer, __v0 % 10000);
|
||||
__buffer = __itoa::__append4(__buffer, __v1 / 10000);
|
||||
__buffer = __itoa::__append4(__buffer, __v1 % 10000);
|
||||
// __value => 1'000'000
|
||||
if (__value < 100000000) {
|
||||
// 1'000'000 <= __value < 100'000'000
|
||||
if (__value < 10000000)
|
||||
return __itoa::__append7(__first, __value);
|
||||
return __itoa::__append8(__first, __value);
|
||||
}
|
||||
|
||||
return __buffer;
|
||||
// 100'000'000 <= __value < max
|
||||
if (__value < 1000000000)
|
||||
return __itoa::__append9(__first, __value);
|
||||
return __itoa::__append10(__first, __value);
|
||||
}
|
||||
|
||||
_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u64(char* __buffer, uint64_t __value) noexcept {
|
||||
if (__value <= UINT32_MAX)
|
||||
return __itoa::__base_10_u32(__buffer, static_cast<uint32_t>(__value));
|
||||
|
||||
// Numbers in the range UINT32_MAX <= val < 10'000'000'000 always contain 10
|
||||
// digits and are outputted after this if statement.
|
||||
if (__value >= 10000000000) {
|
||||
// This function properly deterimines the first non-zero leading digit.
|
||||
__buffer = __itoa::__base_10_u32(__buffer, static_cast<uint32_t>(__value / 10000000000));
|
||||
__value %= 10000000000;
|
||||
}
|
||||
return __itoa::__append10(__buffer, __value);
|
||||
}
|
||||
|
||||
} // namespace __itoa
|
||||
|
|
|
@ -125,9 +125,9 @@ struct _LIBCPP_HIDDEN __traits_base
|
|||
return __t - (__v < __table<>::__pow10_64[__t]) + 1;
|
||||
}
|
||||
|
||||
static _LIBCPP_HIDE_FROM_ABI char* __convert(_Tp __v, char* __p)
|
||||
static _LIBCPP_HIDE_FROM_ABI char* __convert(char* __p, _Tp __v)
|
||||
{
|
||||
return __itoa::__base_10_u64(__v, __p);
|
||||
return __itoa::__base_10_u64(__p, __v);
|
||||
}
|
||||
|
||||
static _LIBCPP_HIDE_FROM_ABI decltype(__table<>::__pow10_64)& __pow() { return __table<>::__pow10_64; }
|
||||
|
@ -145,9 +145,9 @@ struct _LIBCPP_HIDDEN
|
|||
return __t - (__v < __table<>::__pow10_32[__t]) + 1;
|
||||
}
|
||||
|
||||
static _LIBCPP_HIDE_FROM_ABI char* __convert(_Tp __v, char* __p)
|
||||
static _LIBCPP_HIDE_FROM_ABI char* __convert(char* __p, _Tp __v)
|
||||
{
|
||||
return __itoa::__base_10_u32(__v, __p);
|
||||
return __itoa::__base_10_u32(__p, __v);
|
||||
}
|
||||
|
||||
static _LIBCPP_HIDE_FROM_ABI decltype(__table<>::__pow10_32)& __pow() { return __table<>::__pow10_32; }
|
||||
|
@ -262,7 +262,7 @@ __to_chars_itoa(char* __first, char* __last, _Tp __value, false_type)
|
|||
auto __diff = __last - __first;
|
||||
|
||||
if (__tx::digits <= __diff || __tx::__width(__value) <= __diff)
|
||||
return {__tx::__convert(__value, __first), errc(0)};
|
||||
return {__tx::__convert(__first, __value), errc(0)};
|
||||
else
|
||||
return {__last, errc::value_too_large};
|
||||
}
|
||||
|
|
|
@ -21,13 +21,13 @@ namespace __itoa
|
|||
_LIBCPP_FUNC_VIS char*
|
||||
__u32toa(uint32_t value, char* buffer) noexcept
|
||||
{
|
||||
return __base_10_u32(value, buffer);
|
||||
return __base_10_u32(buffer, value);
|
||||
}
|
||||
|
||||
_LIBCPP_FUNC_VIS char*
|
||||
__u64toa(uint64_t value, char* buffer) noexcept
|
||||
{
|
||||
return __base_10_u64(value, buffer);
|
||||
return __base_10_u64(buffer, value);
|
||||
}
|
||||
|
||||
} // namespace __itoa
|
||||
|
|
Loading…
Reference in New Issue