[libc++] Improve charconv base10 algorithm.

This change is a preparation to add the 128-bit integral output.

Before
```
--------------------------------------------------------------
Benchmark                    Time             CPU   Iterations
--------------------------------------------------------------
BM_to_chars_good/2        20.1 ns         20.1 ns     35045000
BM_to_chars_good/3         117 ns          117 ns      5916000
BM_to_chars_good/4        83.7 ns         83.7 ns      8401000
BM_to_chars_good/5        70.6 ns         70.6 ns      9915000
BM_to_chars_good/6        59.9 ns         59.9 ns     11678000
BM_to_chars_good/7        53.9 ns         53.8 ns     12995000
BM_to_chars_good/8        19.0 ns         19.0 ns     37110000
BM_to_chars_good/9        45.9 ns         45.8 ns     15278000
BM_to_chars_good/10       9.24 ns         9.24 ns     75343000
BM_to_chars_good/11       42.6 ns         42.6 ns     16449000
BM_to_chars_good/12       38.8 ns         38.8 ns     18101000
BM_to_chars_good/13       38.8 ns         38.8 ns     17999000
BM_to_chars_good/14       37.7 ns         37.6 ns     18571000
BM_to_chars_good/15       35.8 ns         35.8 ns     19660000
BM_to_chars_good/16       15.4 ns         15.4 ns     46129000
BM_to_chars_good/17       32.3 ns         32.3 ns     21763000
BM_to_chars_good/18       32.8 ns         32.8 ns     21396000
BM_to_chars_good/19       33.4 ns         33.4 ns     21078000
BM_to_chars_good/20       33.3 ns         33.3 ns     21020000
BM_to_chars_good/21       32.3 ns         32.3 ns     21807000
BM_to_chars_good/22       31.6 ns         31.6 ns     22057000
BM_to_chars_good/23       30.7 ns         30.7 ns     22938000
BM_to_chars_good/24       28.3 ns         28.3 ns     24659000
BM_to_chars_good/25       28.2 ns         28.2 ns     24790000
BM_to_chars_good/26       28.4 ns         28.4 ns     24410000
BM_to_chars_good/27       28.7 ns         28.7 ns     24423000
BM_to_chars_good/28       28.9 ns         28.9 ns     24139000
BM_to_chars_good/29       28.9 ns         28.9 ns     24347000
BM_to_chars_good/30       29.2 ns         29.2 ns     24141000
BM_to_chars_good/31       29.6 ns         29.6 ns     23699000
BM_to_chars_good/32       29.5 ns         29.5 ns     23933000
BM_to_chars_good/33       28.9 ns         28.9 ns     24042000
BM_to_chars_good/34       28.7 ns         28.7 ns     24361000
BM_to_chars_good/35       28.3 ns         28.3 ns     24703000
BM_to_chars_good/36       28.1 ns         28.1 ns     24924000
BM_to_chars_bad/2         6.16 ns         6.15 ns    114101000
BM_to_chars_bad/3         14.5 ns         14.5 ns     48244000
BM_to_chars_bad/4         16.9 ns         16.9 ns     41974000
BM_to_chars_bad/5         12.5 ns         12.5 ns     56080000
BM_to_chars_bad/6         10.9 ns         10.9 ns     64036000
BM_to_chars_bad/7         14.5 ns         14.5 ns     47294000
BM_to_chars_bad/8         6.36 ns         6.35 ns    110430000
BM_to_chars_bad/9         12.4 ns         12.4 ns     56448000
BM_to_chars_bad/10        5.13 ns         5.13 ns    137596000
BM_to_chars_bad/11        9.88 ns         9.88 ns     69015000
BM_to_chars_bad/12        10.8 ns         10.8 ns     63990000
BM_to_chars_bad/13        10.7 ns         10.7 ns     65066000
BM_to_chars_bad/14        9.71 ns         9.71 ns     71775000
BM_to_chars_bad/15        9.18 ns         9.18 ns     75267000
BM_to_chars_bad/16        6.12 ns         6.12 ns    115000000
BM_to_chars_bad/17        10.7 ns         10.7 ns     65504000
BM_to_chars_bad/18        10.6 ns         10.6 ns     65685000
BM_to_chars_bad/19        9.98 ns         9.98 ns     69894000
BM_to_chars_bad/20        9.74 ns         9.74 ns     72098000
BM_to_chars_bad/21        9.25 ns         9.25 ns     75184000
BM_to_chars_bad/22        9.10 ns         9.10 ns     75602000
BM_to_chars_bad/23        9.48 ns         9.48 ns     72824000
BM_to_chars_bad/24        9.27 ns         9.27 ns     75112000
BM_to_chars_bad/25        9.61 ns         9.61 ns     72080000
BM_to_chars_bad/26        9.72 ns         9.72 ns     72178000
BM_to_chars_bad/27        10.0 ns         10.0 ns     69733000
BM_to_chars_bad/28        10.3 ns         10.3 ns     67409000
BM_to_chars_bad/29        9.97 ns         9.97 ns     69193000
BM_to_chars_bad/30        10.1 ns         10.1 ns     69007000
BM_to_chars_bad/31        9.68 ns         9.68 ns     72232000
BM_to_chars_bad/32        8.99 ns         8.99 ns     76825000
BM_to_chars_bad/33        8.82 ns         8.82 ns     79293000
BM_to_chars_bad/34        8.64 ns         8.64 ns     80441000
BM_to_chars_bad/35        8.96 ns         8.96 ns     75320000
BM_to_chars_bad/36        8.87 ns         8.87 ns     77293000

```

After
```
--------------------------------------------------------------
Benchmark                    Time             CPU   Iterations
--------------------------------------------------------------
BM_to_chars_good/2        14.7 ns         14.7 ns     47583000
BM_to_chars_good/3         101 ns          101 ns      6901000
BM_to_chars_good/4        68.4 ns         68.4 ns     10088000
BM_to_chars_good/5        58.2 ns         58.2 ns     12007000
BM_to_chars_good/6        51.1 ns         51.1 ns     13687000
BM_to_chars_good/7        45.6 ns         45.6 ns     15323000
BM_to_chars_good/8        14.6 ns         14.6 ns     47795000
BM_to_chars_good/9        40.7 ns         40.7 ns     17371000
BM_to_chars_good/10       7.48 ns         7.48 ns     90931000
BM_to_chars_good/11       37.6 ns         37.6 ns     18542000
BM_to_chars_good/12       35.2 ns         35.2 ns     19922000
BM_to_chars_good/13       34.9 ns         34.9 ns     20105000
BM_to_chars_good/14       33.5 ns         33.5 ns     20863000
BM_to_chars_good/15       31.9 ns         31.9 ns     22014000
BM_to_chars_good/16       11.7 ns         11.7 ns     60012000
BM_to_chars_good/17       28.9 ns         28.9 ns     24148000
BM_to_chars_good/18       29.0 ns         29.0 ns     24317000
BM_to_chars_good/19       28.7 ns         28.7 ns     24363000
BM_to_chars_good/20       28.1 ns         28.1 ns     24899000
BM_to_chars_good/21       27.5 ns         27.5 ns     25499000
BM_to_chars_good/22       26.9 ns         26.9 ns     25929000
BM_to_chars_good/23       26.2 ns         26.2 ns     26828000
BM_to_chars_good/24       25.1 ns         25.1 ns     27742000
BM_to_chars_good/25       25.3 ns         25.3 ns     27720000
BM_to_chars_good/26       25.2 ns         25.2 ns     27789000
BM_to_chars_good/27       25.3 ns         25.3 ns     27777000
BM_to_chars_good/28       25.3 ns         25.3 ns     27643000
BM_to_chars_good/29       25.3 ns         25.3 ns     27750000
BM_to_chars_good/30       25.4 ns         25.4 ns     27566000
BM_to_chars_good/31       25.4 ns         25.4 ns     27611000
BM_to_chars_good/32       25.8 ns         25.8 ns     27218000
BM_to_chars_good/33       25.7 ns         25.7 ns     27070000
BM_to_chars_good/34       26.1 ns         26.1 ns     26693000
BM_to_chars_good/35       26.4 ns         26.4 ns     26486000
BM_to_chars_good/36       26.3 ns         26.3 ns     26619000
BM_to_chars_bad/2         5.99 ns         5.99 ns    118787000
BM_to_chars_bad/3         14.3 ns         14.3 ns     48567000
BM_to_chars_bad/4         16.0 ns         16.0 ns     43239000
BM_to_chars_bad/5         12.6 ns         12.6 ns     55354000
BM_to_chars_bad/6         10.7 ns         10.7 ns     65491000
BM_to_chars_bad/7         14.4 ns         14.4 ns     48723000
BM_to_chars_bad/8         6.50 ns         6.50 ns    104967000
BM_to_chars_bad/9         12.0 ns         12.0 ns     56552000
BM_to_chars_bad/10        5.16 ns         5.16 ns    136380000
BM_to_chars_bad/11        10.5 ns         10.5 ns     66764000
BM_to_chars_bad/12        10.7 ns         10.7 ns     65534000
BM_to_chars_bad/13        11.0 ns         11.0 ns     63426000
BM_to_chars_bad/14        9.90 ns         9.90 ns     68575000
BM_to_chars_bad/15        9.52 ns         9.52 ns     70932000
BM_to_chars_bad/16        6.14 ns         6.14 ns    111762000
BM_to_chars_bad/17        10.6 ns         10.6 ns     65883000
BM_to_chars_bad/18        10.5 ns         10.5 ns     67606000
BM_to_chars_bad/19        9.96 ns         9.96 ns     68898000
BM_to_chars_bad/20        9.40 ns         9.41 ns     73116000
BM_to_chars_bad/21        9.12 ns         9.12 ns     78647000
BM_to_chars_bad/22        8.95 ns         8.95 ns     80211000
BM_to_chars_bad/23        9.50 ns         9.49 ns     73571000
BM_to_chars_bad/24        9.29 ns         9.29 ns     74690000
BM_to_chars_bad/25        9.65 ns         9.65 ns     72877000
BM_to_chars_bad/26        9.78 ns         9.78 ns     70171000
BM_to_chars_bad/27        10.1 ns         10.1 ns     69543000
BM_to_chars_bad/28        10.4 ns         10.4 ns     67582000
BM_to_chars_bad/29       10.00 ns        10.00 ns     70806000
BM_to_chars_bad/30        9.99 ns         9.99 ns     70340000
BM_to_chars_bad/31        9.56 ns         9.56 ns     74159000
BM_to_chars_bad/32        8.97 ns         8.97 ns     78052000
BM_to_chars_bad/33        8.86 ns         8.86 ns     78586000
BM_to_chars_bad/34        8.81 ns         8.81 ns     78562000
BM_to_chars_bad/35        8.90 ns         8.90 ns     77384000
BM_to_chars_bad/36        9.04 ns         9.04 ns     77263000

```

Reviewed By: #libc, ldionne

Differential Revision: https://reviews.llvm.org/D127764
This commit is contained in:
Mark de Wever 2022-05-16 07:42:40 +02:00
parent 5517bc6c4a
commit 3561ee586e
3 changed files with 80 additions and 81 deletions

View File

@ -10,10 +10,10 @@
#ifndef _LIBCPP___CHARCONV_TO_CHARS_BASE_10_H
#define _LIBCPP___CHARCONV_TO_CHARS_BASE_10_H
#include <__algorithm/copy_n.h>
#include <__charconv/tables.h>
#include <__config>
#include <cstdint>
#include <cstring>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
@ -25,98 +25,97 @@ _LIBCPP_BEGIN_NAMESPACE_STD
namespace __itoa {
template <class _Tp>
_LIBCPP_HIDE_FROM_ABI char* __append1(char* __buffer, _Tp __value) noexcept {
*__buffer = '0' + static_cast<char>(__value);
return __buffer + 1;
_LIBCPP_HIDE_FROM_ABI inline char* __append1(char* __first, uint32_t __value) noexcept {
*__first = '0' + static_cast<char>(__value);
return __first + 1;
}
template <class _Tp>
_LIBCPP_HIDE_FROM_ABI char* __append2(char* __buffer, _Tp __value) noexcept {
std::memcpy(__buffer, &__table<>::__digits_base_10[(__value)*2], 2);
return __buffer + 2;
_LIBCPP_HIDE_FROM_ABI inline char* __append2(char* __first, uint32_t __value) noexcept {
return std::copy_n(&__table<>::__digits_base_10[__value * 2], 2, __first);
}
template <class _Tp>
_LIBCPP_HIDE_FROM_ABI char* __append3(char* __buffer, _Tp __value) noexcept {
return __itoa::__append2(__itoa::__append1(__buffer, (__value) / 100), (__value) % 100);
_LIBCPP_HIDE_FROM_ABI inline char* __append3(char* __first, uint32_t __value) noexcept {
return __itoa::__append2(__itoa::__append1(__first, __value / 100), __value % 100);
}
template <class _Tp>
_LIBCPP_HIDE_FROM_ABI char* __append4(char* __buffer, _Tp __value) noexcept {
return __itoa::__append2(__itoa::__append2(__buffer, (__value) / 100), (__value) % 100);
_LIBCPP_HIDE_FROM_ABI inline char* __append4(char* __first, uint32_t __value) noexcept {
return __itoa::__append2(__itoa::__append2(__first, __value / 100), __value % 100);
}
template <class _Tp>
_LIBCPP_HIDE_FROM_ABI char* __append2_no_zeros(char* __buffer, _Tp __value) noexcept {
if (__value < 10)
return __itoa::__append1(__buffer, __value);
else
return __itoa::__append2(__buffer, __value);
_LIBCPP_HIDE_FROM_ABI inline char* __append5(char* __first, uint32_t __value) noexcept {
return __itoa::__append4(__itoa::__append1(__first, __value / 10000), __value % 10000);
}
template <class _Tp>
_LIBCPP_HIDE_FROM_ABI char* __append4_no_zeros(char* __buffer, _Tp __value) noexcept {
if (__value < 100)
return __itoa::__append2_no_zeros(__buffer, __value);
else if (__value < 1000)
return __itoa::__append3(__buffer, __value);
else
return __itoa::__append4(__buffer, __value);
_LIBCPP_HIDE_FROM_ABI inline char* __append6(char* __first, uint32_t __value) noexcept {
return __itoa::__append4(__itoa::__append2(__first, __value / 10000), __value % 10000);
}
template <class _Tp>
_LIBCPP_HIDE_FROM_ABI char* __append8_no_zeros(char* __buffer, _Tp __value) noexcept {
if (__value < 10000)
__buffer = __itoa::__append4_no_zeros(__buffer, __value);
else {
__buffer = __itoa::__append4_no_zeros(__buffer, __value / 10000);
__buffer = __itoa::__append4(__buffer, __value % 10000);
}
return __buffer;
_LIBCPP_HIDE_FROM_ABI inline char* __append7(char* __first, uint32_t __value) noexcept {
return __itoa::__append6(__itoa::__append1(__first, __value / 1000000), __value % 1000000);
}
_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u32(uint32_t __value, char* __buffer) noexcept {
if (__value < 100000000)
__buffer = __itoa::__append8_no_zeros(__buffer, __value);
else {
// __value = aabbbbcccc in decimal
const uint32_t __a = __value / 100000000; // 1 to 42
__value %= 100000000;
_LIBCPP_HIDE_FROM_ABI inline char* __append8(char* __first, uint32_t __value) noexcept {
return __itoa::__append6(__itoa::__append2(__first, __value / 1000000), __value % 1000000);
}
__buffer = __itoa::__append2_no_zeros(__buffer, __a);
__buffer = __itoa::__append4(__buffer, __value / 10000);
__buffer = __itoa::__append4(__buffer, __value % 10000);
_LIBCPP_HIDE_FROM_ABI inline char* __append9(char* __first, uint32_t __value) noexcept {
return __itoa::__append8(__itoa::__append1(__first, __value / 100000000), __value % 100000000);
}
// This function is used for uint32_t and uint64_t.
template <class _Tp>
_LIBCPP_HIDE_FROM_ABI char* __append10(char* __first, _Tp __value) noexcept {
return __itoa::__append8(__itoa::__append2(__first, static_cast<uint32_t>(__value / 100000000)),
static_cast<uint32_t>(__value % 100000000));
}
_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u32(char* __first, uint32_t __value) noexcept {
if (__value < 1000000) {
if (__value < 10000) {
if (__value < 100) {
// 0 <= __value < 100
if (__value < 10)
return __itoa::__append1(__first, __value);
return __itoa::__append2(__first, __value);
}
// 100 <= __value < 10'000
if (__value < 1000)
return __itoa::__append3(__first, __value);
return __itoa::__append4(__first, __value);
}
// 10'000 <= __value < 1'000'000
if (__value < 100000)
return __itoa::__append5(__first, __value);
return __itoa::__append6(__first, __value);
}
return __buffer;
}
_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u64(uint64_t __value, char* __buffer) noexcept {
if (__value < 100000000)
__buffer = __itoa::__append8_no_zeros(__buffer, static_cast<uint32_t>(__value));
else if (__value < 10000000000000000) {
const uint32_t __v0 = static_cast<uint32_t>(__value / 100000000);
const uint32_t __v1 = static_cast<uint32_t>(__value % 100000000);
__buffer = __itoa::__append8_no_zeros(__buffer, __v0);
__buffer = __itoa::__append4(__buffer, __v1 / 10000);
__buffer = __itoa::__append4(__buffer, __v1 % 10000);
} else {
const uint32_t __a = static_cast<uint32_t>(__value / 10000000000000000); // 1 to 1844
__value %= 10000000000000000;
__buffer = __itoa::__append4_no_zeros(__buffer, __a);
const uint32_t __v0 = static_cast<uint32_t>(__value / 100000000);
const uint32_t __v1 = static_cast<uint32_t>(__value % 100000000);
__buffer = __itoa::__append4(__buffer, __v0 / 10000);
__buffer = __itoa::__append4(__buffer, __v0 % 10000);
__buffer = __itoa::__append4(__buffer, __v1 / 10000);
__buffer = __itoa::__append4(__buffer, __v1 % 10000);
// __value => 1'000'000
if (__value < 100000000) {
// 1'000'000 <= __value < 100'000'000
if (__value < 10000000)
return __itoa::__append7(__first, __value);
return __itoa::__append8(__first, __value);
}
return __buffer;
// 100'000'000 <= __value < max
if (__value < 1000000000)
return __itoa::__append9(__first, __value);
return __itoa::__append10(__first, __value);
}
_LIBCPP_HIDE_FROM_ABI inline char* __base_10_u64(char* __buffer, uint64_t __value) noexcept {
if (__value <= UINT32_MAX)
return __itoa::__base_10_u32(__buffer, static_cast<uint32_t>(__value));
// Numbers in the range UINT32_MAX <= val < 10'000'000'000 always contain 10
// digits and are outputted after this if statement.
if (__value >= 10000000000) {
// This function properly deterimines the first non-zero leading digit.
__buffer = __itoa::__base_10_u32(__buffer, static_cast<uint32_t>(__value / 10000000000));
__value %= 10000000000;
}
return __itoa::__append10(__buffer, __value);
}
} // namespace __itoa

View File

@ -125,9 +125,9 @@ struct _LIBCPP_HIDDEN __traits_base
return __t - (__v < __table<>::__pow10_64[__t]) + 1;
}
static _LIBCPP_HIDE_FROM_ABI char* __convert(_Tp __v, char* __p)
static _LIBCPP_HIDE_FROM_ABI char* __convert(char* __p, _Tp __v)
{
return __itoa::__base_10_u64(__v, __p);
return __itoa::__base_10_u64(__p, __v);
}
static _LIBCPP_HIDE_FROM_ABI decltype(__table<>::__pow10_64)& __pow() { return __table<>::__pow10_64; }
@ -145,9 +145,9 @@ struct _LIBCPP_HIDDEN
return __t - (__v < __table<>::__pow10_32[__t]) + 1;
}
static _LIBCPP_HIDE_FROM_ABI char* __convert(_Tp __v, char* __p)
static _LIBCPP_HIDE_FROM_ABI char* __convert(char* __p, _Tp __v)
{
return __itoa::__base_10_u32(__v, __p);
return __itoa::__base_10_u32(__p, __v);
}
static _LIBCPP_HIDE_FROM_ABI decltype(__table<>::__pow10_32)& __pow() { return __table<>::__pow10_32; }
@ -262,7 +262,7 @@ __to_chars_itoa(char* __first, char* __last, _Tp __value, false_type)
auto __diff = __last - __first;
if (__tx::digits <= __diff || __tx::__width(__value) <= __diff)
return {__tx::__convert(__value, __first), errc(0)};
return {__tx::__convert(__first, __value), errc(0)};
else
return {__last, errc::value_too_large};
}

View File

@ -21,13 +21,13 @@ namespace __itoa
_LIBCPP_FUNC_VIS char*
__u32toa(uint32_t value, char* buffer) noexcept
{
return __base_10_u32(value, buffer);
return __base_10_u32(buffer, value);
}
_LIBCPP_FUNC_VIS char*
__u64toa(uint64_t value, char* buffer) noexcept
{
return __base_10_u64(value, buffer);
return __base_10_u64(buffer, value);
}
} // namespace __itoa