[libc++] Optimize / partially inline basic_string copy constructor

Splits copy constructor up inlining short initialization, outlining long
initialization into __init_long() which is the externally instantiated slow
path initialization.

Subsequently changing the copy ctor to be inlined (not externally instantiated)
provides significant speed ups for short string initialization.

Generated code given:

void StringCopyCtor(void* mem, const std::string& s) {
    std::string*p = new(mem) std::string{s};
}

asm:
        cmp     byte ptr [rsi + 23], 0
        js      .LBB0_2
        mov     rax, qword ptr [rsi + 16]
        mov     qword ptr [rdi + 16], rax
        movups  xmm0, xmmword ptr [rsi]
        movups  xmmword ptr [rdi], xmm0
        ret
.LBB0_2:
        jmp     std::basic_string::__init_long # TAILCALL

Benchmark:
BM_StringCopy_Empty                                           5.19ns ± 6%             1.50ns ± 8%  -71.02%        (p=0.000 n=10+10)
BM_StringCopy_Small                                           5.14ns ± 8%             1.53ns ± 7%  -70.17%        (p=0.000 n=10+10)
BM_StringCopy_Large                                           18.9ns ± 0%             19.3ns ± 0%   +1.92%        (p=0.000 n=10+10)
BM_StringCopy_Huge                                             309ns ± 1%              316ns ± 5%     ~            (p=0.633 n=8+10)

Patch from Martijn Vels (mvels@google.com)
Reviewed as D72160.
This commit is contained in:
Eric Fiselier 2020-01-17 16:53:47 -05:00
parent cd40bd0a32
commit a8a9c8e0a1
1 changed files with 19 additions and 2 deletions

View File

@ -1549,6 +1549,11 @@ private:
inline
void __init(size_type __n, value_type __c);
// Identical to __init(s, sz), except that this function is always
// externally instantiated and not inlined: this function is the
// slow path for the (inlined) copy constructor.
void __init_long_external(const value_type* __s, size_type __sz);
template <class _InputIterator>
inline
_EnableIf
@ -1797,6 +1802,18 @@ basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_ty
traits_type::assign(__p[__sz], value_type());
}
template <class _CharT, class _Traits, class _Allocator>
void basic_string<_CharT, _Traits, _Allocator>::__init_long_external(
const _CharT* __s, size_type __sz) {
size_type __cap = __recommend(__sz);
pointer __p = __alloc_traits::allocate(__alloc(), __cap + 1);
__set_long_pointer(__p);
__set_long_cap(__cap + 1);
__set_long_size(__sz);
traits_type::copy(_VSTD::__to_address(__p), __s, __sz);
traits_type::assign(__p[__sz], value_type());
}
template <class _CharT, class _Traits, class _Allocator>
template <class>
basic_string<_CharT, _Traits, _Allocator>::basic_string(const _CharT* __s, const _Allocator& __a)
@ -1840,7 +1857,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(const basic_string& __st
if (!__str.__is_long())
__r_.first().__r = __str.__r_.first().__r;
else
__init(_VSTD::__to_address(__str.__get_long_pointer()), __str.__get_long_size());
__init_long_external(_VSTD::__to_address(__str.__get_long_pointer()), __str.__get_long_size());
#if _LIBCPP_DEBUG_LEVEL >= 2
__get_db()->__insert_c(this);
#endif
@ -1854,7 +1871,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(
if (!__str.__is_long())
__r_.first().__r = __str.__r_.first().__r;
else
__init(_VSTD::__to_address(__str.__get_long_pointer()), __str.__get_long_size());
__init_long_external(_VSTD::__to_address(__str.__get_long_pointer()), __str.__get_long_size());
#if _LIBCPP_DEBUG_LEVEL >= 2
__get_db()->__insert_c(this);
#endif