From 3561ee586ed0ec8909527c09020b42bd09718da1 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Mon, 16 May 2022 07:42:40 +0200 Subject: [PATCH] [libc++] Improve charconv base10 algorithm. This change is a preparation to add the 128-bit integral output. Before ``` -------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------- BM_to_chars_good/2 20.1 ns 20.1 ns 35045000 BM_to_chars_good/3 117 ns 117 ns 5916000 BM_to_chars_good/4 83.7 ns 83.7 ns 8401000 BM_to_chars_good/5 70.6 ns 70.6 ns 9915000 BM_to_chars_good/6 59.9 ns 59.9 ns 11678000 BM_to_chars_good/7 53.9 ns 53.8 ns 12995000 BM_to_chars_good/8 19.0 ns 19.0 ns 37110000 BM_to_chars_good/9 45.9 ns 45.8 ns 15278000 BM_to_chars_good/10 9.24 ns 9.24 ns 75343000 BM_to_chars_good/11 42.6 ns 42.6 ns 16449000 BM_to_chars_good/12 38.8 ns 38.8 ns 18101000 BM_to_chars_good/13 38.8 ns 38.8 ns 17999000 BM_to_chars_good/14 37.7 ns 37.6 ns 18571000 BM_to_chars_good/15 35.8 ns 35.8 ns 19660000 BM_to_chars_good/16 15.4 ns 15.4 ns 46129000 BM_to_chars_good/17 32.3 ns 32.3 ns 21763000 BM_to_chars_good/18 32.8 ns 32.8 ns 21396000 BM_to_chars_good/19 33.4 ns 33.4 ns 21078000 BM_to_chars_good/20 33.3 ns 33.3 ns 21020000 BM_to_chars_good/21 32.3 ns 32.3 ns 21807000 BM_to_chars_good/22 31.6 ns 31.6 ns 22057000 BM_to_chars_good/23 30.7 ns 30.7 ns 22938000 BM_to_chars_good/24 28.3 ns 28.3 ns 24659000 BM_to_chars_good/25 28.2 ns 28.2 ns 24790000 BM_to_chars_good/26 28.4 ns 28.4 ns 24410000 BM_to_chars_good/27 28.7 ns 28.7 ns 24423000 BM_to_chars_good/28 28.9 ns 28.9 ns 24139000 BM_to_chars_good/29 28.9 ns 28.9 ns 24347000 BM_to_chars_good/30 29.2 ns 29.2 ns 24141000 BM_to_chars_good/31 29.6 ns 29.6 ns 23699000 BM_to_chars_good/32 29.5 ns 29.5 ns 23933000 BM_to_chars_good/33 28.9 ns 28.9 ns 24042000 BM_to_chars_good/34 28.7 ns 28.7 ns 24361000 BM_to_chars_good/35 28.3 ns 28.3 ns 24703000 BM_to_chars_good/36 28.1 ns 28.1 ns 24924000 BM_to_chars_bad/2 6.16 ns 6.15 ns 114101000 BM_to_chars_bad/3 14.5 ns 14.5 ns 48244000 BM_to_chars_bad/4 16.9 ns 16.9 ns 41974000 BM_to_chars_bad/5 12.5 ns 12.5 ns 56080000 BM_to_chars_bad/6 10.9 ns 10.9 ns 64036000 BM_to_chars_bad/7 14.5 ns 14.5 ns 47294000 BM_to_chars_bad/8 6.36 ns 6.35 ns 110430000 BM_to_chars_bad/9 12.4 ns 12.4 ns 56448000 BM_to_chars_bad/10 5.13 ns 5.13 ns 137596000 BM_to_chars_bad/11 9.88 ns 9.88 ns 69015000 BM_to_chars_bad/12 10.8 ns 10.8 ns 63990000 BM_to_chars_bad/13 10.7 ns 10.7 ns 65066000 BM_to_chars_bad/14 9.71 ns 9.71 ns 71775000 BM_to_chars_bad/15 9.18 ns 9.18 ns 75267000 BM_to_chars_bad/16 6.12 ns 6.12 ns 115000000 BM_to_chars_bad/17 10.7 ns 10.7 ns 65504000 BM_to_chars_bad/18 10.6 ns 10.6 ns 65685000 BM_to_chars_bad/19 9.98 ns 9.98 ns 69894000 BM_to_chars_bad/20 9.74 ns 9.74 ns 72098000 BM_to_chars_bad/21 9.25 ns 9.25 ns 75184000 BM_to_chars_bad/22 9.10 ns 9.10 ns 75602000 BM_to_chars_bad/23 9.48 ns 9.48 ns 72824000 BM_to_chars_bad/24 9.27 ns 9.27 ns 75112000 BM_to_chars_bad/25 9.61 ns 9.61 ns 72080000 BM_to_chars_bad/26 9.72 ns 9.72 ns 72178000 BM_to_chars_bad/27 10.0 ns 10.0 ns 69733000 BM_to_chars_bad/28 10.3 ns 10.3 ns 67409000 BM_to_chars_bad/29 9.97 ns 9.97 ns 69193000 BM_to_chars_bad/30 10.1 ns 10.1 ns 69007000 BM_to_chars_bad/31 9.68 ns 9.68 ns 72232000 BM_to_chars_bad/32 8.99 ns 8.99 ns 76825000 BM_to_chars_bad/33 8.82 ns 8.82 ns 79293000 BM_to_chars_bad/34 8.64 ns 8.64 ns 80441000 BM_to_chars_bad/35 8.96 ns 8.96 ns 75320000 BM_to_chars_bad/36 8.87 ns 8.87 ns 77293000 ``` After ``` -------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------- BM_to_chars_good/2 14.7 ns 14.7 ns 47583000 BM_to_chars_good/3 101 ns 101 ns 6901000 BM_to_chars_good/4 68.4 ns 68.4 ns 10088000 BM_to_chars_good/5 58.2 ns 58.2 ns 12007000 BM_to_chars_good/6 51.1 ns 51.1 ns 13687000 BM_to_chars_good/7 45.6 ns 45.6 ns 15323000 BM_to_chars_good/8 14.6 ns 14.6 ns 47795000 BM_to_chars_good/9 40.7 ns 40.7 ns 17371000 BM_to_chars_good/10 7.48 ns 7.48 ns 90931000 BM_to_chars_good/11 37.6 ns 37.6 ns 18542000 BM_to_chars_good/12 35.2 ns 35.2 ns 19922000 BM_to_chars_good/13 34.9 ns 34.9 ns 20105000 BM_to_chars_good/14 33.5 ns 33.5 ns 20863000 BM_to_chars_good/15 31.9 ns 31.9 ns 22014000 BM_to_chars_good/16 11.7 ns 11.7 ns 60012000 BM_to_chars_good/17 28.9 ns 28.9 ns 24148000 BM_to_chars_good/18 29.0 ns 29.0 ns 24317000 BM_to_chars_good/19 28.7 ns 28.7 ns 24363000 BM_to_chars_good/20 28.1 ns 28.1 ns 24899000 BM_to_chars_good/21 27.5 ns 27.5 ns 25499000 BM_to_chars_good/22 26.9 ns 26.9 ns 25929000 BM_to_chars_good/23 26.2 ns 26.2 ns 26828000 BM_to_chars_good/24 25.1 ns 25.1 ns 27742000 BM_to_chars_good/25 25.3 ns 25.3 ns 27720000 BM_to_chars_good/26 25.2 ns 25.2 ns 27789000 BM_to_chars_good/27 25.3 ns 25.3 ns 27777000 BM_to_chars_good/28 25.3 ns 25.3 ns 27643000 BM_to_chars_good/29 25.3 ns 25.3 ns 27750000 BM_to_chars_good/30 25.4 ns 25.4 ns 27566000 BM_to_chars_good/31 25.4 ns 25.4 ns 27611000 BM_to_chars_good/32 25.8 ns 25.8 ns 27218000 BM_to_chars_good/33 25.7 ns 25.7 ns 27070000 BM_to_chars_good/34 26.1 ns 26.1 ns 26693000 BM_to_chars_good/35 26.4 ns 26.4 ns 26486000 BM_to_chars_good/36 26.3 ns 26.3 ns 26619000 BM_to_chars_bad/2 5.99 ns 5.99 ns 118787000 BM_to_chars_bad/3 14.3 ns 14.3 ns 48567000 BM_to_chars_bad/4 16.0 ns 16.0 ns 43239000 BM_to_chars_bad/5 12.6 ns 12.6 ns 55354000 BM_to_chars_bad/6 10.7 ns 10.7 ns 65491000 BM_to_chars_bad/7 14.4 ns 14.4 ns 48723000 BM_to_chars_bad/8 6.50 ns 6.50 ns 104967000 BM_to_chars_bad/9 12.0 ns 12.0 ns 56552000 BM_to_chars_bad/10 5.16 ns 5.16 ns 136380000 BM_to_chars_bad/11 10.5 ns 10.5 ns 66764000 BM_to_chars_bad/12 10.7 ns 10.7 ns 65534000 BM_to_chars_bad/13 11.0 ns 11.0 ns 63426000 BM_to_chars_bad/14 9.90 ns 9.90 ns 68575000 BM_to_chars_bad/15 9.52 ns 9.52 ns 70932000 BM_to_chars_bad/16 6.14 ns 6.14 ns 111762000 BM_to_chars_bad/17 10.6 ns 10.6 ns 65883000 BM_to_chars_bad/18 10.5 ns 10.5 ns 67606000 BM_to_chars_bad/19 9.96 ns 9.96 ns 68898000 BM_to_chars_bad/20 9.40 ns 9.41 ns 73116000 BM_to_chars_bad/21 9.12 ns 9.12 ns 78647000 BM_to_chars_bad/22 8.95 ns 8.95 ns 80211000 BM_to_chars_bad/23 9.50 ns 9.49 ns 73571000 BM_to_chars_bad/24 9.29 ns 9.29 ns 74690000 BM_to_chars_bad/25 9.65 ns 9.65 ns 72877000 BM_to_chars_bad/26 9.78 ns 9.78 ns 70171000 BM_to_chars_bad/27 10.1 ns 10.1 ns 69543000 BM_to_chars_bad/28 10.4 ns 10.4 ns 67582000 BM_to_chars_bad/29 10.00 ns 10.00 ns 70806000 BM_to_chars_bad/30 9.99 ns 9.99 ns 70340000 BM_to_chars_bad/31 9.56 ns 9.56 ns 74159000 BM_to_chars_bad/32 8.97 ns 8.97 ns 78052000 BM_to_chars_bad/33 8.86 ns 8.86 ns 78586000 BM_to_chars_bad/34 8.81 ns 8.81 ns 78562000 BM_to_chars_bad/35 8.90 ns 8.90 ns 77384000 BM_to_chars_bad/36 9.04 ns 9.04 ns 77263000 ``` Reviewed By: #libc, ldionne Differential Revision: https://reviews.llvm.org/D127764 --- libcxx/include/__charconv/to_chars_base_10.h | 147 +++++++++---------- libcxx/include/charconv | 10 +- libcxx/src/charconv.cpp | 4 +- 3 files changed, 80 insertions(+), 81 deletions(-) diff --git a/libcxx/include/__charconv/to_chars_base_10.h b/libcxx/include/__charconv/to_chars_base_10.h index 1c6804a85246..91c209559aff 100644 --- a/libcxx/include/__charconv/to_chars_base_10.h +++ b/libcxx/include/__charconv/to_chars_base_10.h @@ -10,10 +10,10 @@ #ifndef _LIBCPP___CHARCONV_TO_CHARS_BASE_10_H #define _LIBCPP___CHARCONV_TO_CHARS_BASE_10_H +#include <__algorithm/copy_n.h> #include <__charconv/tables.h> #include <__config> #include -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -25,98 +25,97 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace __itoa { -template -_LIBCPP_HIDE_FROM_ABI char* __append1(char* __buffer, _Tp __value) noexcept { - *__buffer = '0' + static_cast(__value); - return __buffer + 1; +_LIBCPP_HIDE_FROM_ABI inline char* __append1(char* __first, uint32_t __value) noexcept { + *__first = '0' + static_cast(__value); + return __first + 1; } -template -_LIBCPP_HIDE_FROM_ABI char* __append2(char* __buffer, _Tp __value) noexcept { - std::memcpy(__buffer, &__table<>::__digits_base_10[(__value)*2], 2); - return __buffer + 2; +_LIBCPP_HIDE_FROM_ABI inline char* __append2(char* __first, uint32_t __value) noexcept { + return std::copy_n(&__table<>::__digits_base_10[__value * 2], 2, __first); } -template -_LIBCPP_HIDE_FROM_ABI char* __append3(char* __buffer, _Tp __value) noexcept { - return __itoa::__append2(__itoa::__append1(__buffer, (__value) / 100), (__value) % 100); +_LIBCPP_HIDE_FROM_ABI inline char* __append3(char* __first, uint32_t __value) noexcept { + return __itoa::__append2(__itoa::__append1(__first, __value / 100), __value % 100); } -template -_LIBCPP_HIDE_FROM_ABI char* __append4(char* __buffer, _Tp __value) noexcept { - return __itoa::__append2(__itoa::__append2(__buffer, (__value) / 100), (__value) % 100); +_LIBCPP_HIDE_FROM_ABI inline char* __append4(char* __first, uint32_t __value) noexcept { + return __itoa::__append2(__itoa::__append2(__first, __value / 100), __value % 100); } -template -_LIBCPP_HIDE_FROM_ABI char* __append2_no_zeros(char* __buffer, _Tp __value) noexcept { - if (__value < 10) - return __itoa::__append1(__buffer, __value); - else - return __itoa::__append2(__buffer, __value); +_LIBCPP_HIDE_FROM_ABI inline char* __append5(char* __first, uint32_t __value) noexcept { + return __itoa::__append4(__itoa::__append1(__first, __value / 10000), __value % 10000); } -template -_LIBCPP_HIDE_FROM_ABI char* __append4_no_zeros(char* __buffer, _Tp __value) noexcept { - if (__value < 100) - return __itoa::__append2_no_zeros(__buffer, __value); - else if (__value < 1000) - return __itoa::__append3(__buffer, __value); - else - return __itoa::__append4(__buffer, __value); +_LIBCPP_HIDE_FROM_ABI inline char* __append6(char* __first, uint32_t __value) noexcept { + return __itoa::__append4(__itoa::__append2(__first, __value / 10000), __value % 10000); } -template -_LIBCPP_HIDE_FROM_ABI char* __append8_no_zeros(char* __buffer, _Tp __value) noexcept { - if (__value < 10000) - __buffer = __itoa::__append4_no_zeros(__buffer, __value); - else { - __buffer = __itoa::__append4_no_zeros(__buffer, __value / 10000); - __buffer = __itoa::__append4(__buffer, __value % 10000); - } - return __buffer; +_LIBCPP_HIDE_FROM_ABI inline char* __append7(char* __first, uint32_t __value) noexcept { + return __itoa::__append6(__itoa::__append1(__first, __value / 1000000), __value % 1000000); } -_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u32(uint32_t __value, char* __buffer) noexcept { - if (__value < 100000000) - __buffer = __itoa::__append8_no_zeros(__buffer, __value); - else { - // __value = aabbbbcccc in decimal - const uint32_t __a = __value / 100000000; // 1 to 42 - __value %= 100000000; +_LIBCPP_HIDE_FROM_ABI inline char* __append8(char* __first, uint32_t __value) noexcept { + return __itoa::__append6(__itoa::__append2(__first, __value / 1000000), __value % 1000000); +} - __buffer = __itoa::__append2_no_zeros(__buffer, __a); - __buffer = __itoa::__append4(__buffer, __value / 10000); - __buffer = __itoa::__append4(__buffer, __value % 10000); +_LIBCPP_HIDE_FROM_ABI inline char* __append9(char* __first, uint32_t __value) noexcept { + return __itoa::__append8(__itoa::__append1(__first, __value / 100000000), __value % 100000000); +} + +// This function is used for uint32_t and uint64_t. +template +_LIBCPP_HIDE_FROM_ABI char* __append10(char* __first, _Tp __value) noexcept { + return __itoa::__append8(__itoa::__append2(__first, static_cast(__value / 100000000)), + static_cast(__value % 100000000)); +} + +_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u32(char* __first, uint32_t __value) noexcept { + if (__value < 1000000) { + if (__value < 10000) { + if (__value < 100) { + // 0 <= __value < 100 + if (__value < 10) + return __itoa::__append1(__first, __value); + return __itoa::__append2(__first, __value); + } + // 100 <= __value < 10'000 + if (__value < 1000) + return __itoa::__append3(__first, __value); + return __itoa::__append4(__first, __value); + } + + // 10'000 <= __value < 1'000'000 + if (__value < 100000) + return __itoa::__append5(__first, __value); + return __itoa::__append6(__first, __value); } - return __buffer; -} - -_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u64(uint64_t __value, char* __buffer) noexcept { - if (__value < 100000000) - __buffer = __itoa::__append8_no_zeros(__buffer, static_cast(__value)); - else if (__value < 10000000000000000) { - const uint32_t __v0 = static_cast(__value / 100000000); - const uint32_t __v1 = static_cast(__value % 100000000); - - __buffer = __itoa::__append8_no_zeros(__buffer, __v0); - __buffer = __itoa::__append4(__buffer, __v1 / 10000); - __buffer = __itoa::__append4(__buffer, __v1 % 10000); - } else { - const uint32_t __a = static_cast(__value / 10000000000000000); // 1 to 1844 - __value %= 10000000000000000; - - __buffer = __itoa::__append4_no_zeros(__buffer, __a); - - const uint32_t __v0 = static_cast(__value / 100000000); - const uint32_t __v1 = static_cast(__value % 100000000); - __buffer = __itoa::__append4(__buffer, __v0 / 10000); - __buffer = __itoa::__append4(__buffer, __v0 % 10000); - __buffer = __itoa::__append4(__buffer, __v1 / 10000); - __buffer = __itoa::__append4(__buffer, __v1 % 10000); + // __value => 1'000'000 + if (__value < 100000000) { + // 1'000'000 <= __value < 100'000'000 + if (__value < 10000000) + return __itoa::__append7(__first, __value); + return __itoa::__append8(__first, __value); } - return __buffer; + // 100'000'000 <= __value < max + if (__value < 1000000000) + return __itoa::__append9(__first, __value); + return __itoa::__append10(__first, __value); +} + +_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u64(char* __buffer, uint64_t __value) noexcept { + if (__value <= UINT32_MAX) + return __itoa::__base_10_u32(__buffer, static_cast(__value)); + + // Numbers in the range UINT32_MAX <= val < 10'000'000'000 always contain 10 + // digits and are outputted after this if statement. + if (__value >= 10000000000) { + // This function properly deterimines the first non-zero leading digit. + __buffer = __itoa::__base_10_u32(__buffer, static_cast(__value / 10000000000)); + __value %= 10000000000; + } + return __itoa::__append10(__buffer, __value); } } // namespace __itoa diff --git a/libcxx/include/charconv b/libcxx/include/charconv index ce2897cf7c28..b88ef46752a3 100644 --- a/libcxx/include/charconv +++ b/libcxx/include/charconv @@ -125,9 +125,9 @@ struct _LIBCPP_HIDDEN __traits_base return __t - (__v < __table<>::__pow10_64[__t]) + 1; } - static _LIBCPP_HIDE_FROM_ABI char* __convert(_Tp __v, char* __p) + static _LIBCPP_HIDE_FROM_ABI char* __convert(char* __p, _Tp __v) { - return __itoa::__base_10_u64(__v, __p); + return __itoa::__base_10_u64(__p, __v); } static _LIBCPP_HIDE_FROM_ABI decltype(__table<>::__pow10_64)& __pow() { return __table<>::__pow10_64; } @@ -145,9 +145,9 @@ struct _LIBCPP_HIDDEN return __t - (__v < __table<>::__pow10_32[__t]) + 1; } - static _LIBCPP_HIDE_FROM_ABI char* __convert(_Tp __v, char* __p) + static _LIBCPP_HIDE_FROM_ABI char* __convert(char* __p, _Tp __v) { - return __itoa::__base_10_u32(__v, __p); + return __itoa::__base_10_u32(__p, __v); } static _LIBCPP_HIDE_FROM_ABI decltype(__table<>::__pow10_32)& __pow() { return __table<>::__pow10_32; } @@ -262,7 +262,7 @@ __to_chars_itoa(char* __first, char* __last, _Tp __value, false_type) auto __diff = __last - __first; if (__tx::digits <= __diff || __tx::__width(__value) <= __diff) - return {__tx::__convert(__value, __first), errc(0)}; + return {__tx::__convert(__first, __value), errc(0)}; else return {__last, errc::value_too_large}; } diff --git a/libcxx/src/charconv.cpp b/libcxx/src/charconv.cpp index 4139dea5f3aa..05ee2cbd4ac8 100644 --- a/libcxx/src/charconv.cpp +++ b/libcxx/src/charconv.cpp @@ -21,13 +21,13 @@ namespace __itoa _LIBCPP_FUNC_VIS char* __u32toa(uint32_t value, char* buffer) noexcept { - return __base_10_u32(value, buffer); + return __base_10_u32(buffer, value); } _LIBCPP_FUNC_VIS char* __u64toa(uint64_t value, char* buffer) noexcept { - return __base_10_u64(value, buffer); + return __base_10_u64(buffer, value); } } // namespace __itoa