forked from OSchip/llvm-project
[libc++] Use bit field for checking if string is in long or short mode
This makes the code a bit simpler and (I think) removes the undefined behaviour from the normal string layout. Reviewed By: ldionne, Mordante, #libc Spies: labath, dblaikie, JDevlieghere, krytarowski, jgorbe, jingham, saugustine, arichardson, libcxx-commits Differential Revision: https://reviews.llvm.org/D123580
This commit is contained in:
parent
1056c56786
commit
29c8c070a1
|
@ -532,6 +532,8 @@ basic_string<char32_t> operator "" s( const char32_t *str, size_t len ); // C++1
|
|||
#include <__utility/auto_cast.h>
|
||||
#include <__utility/move.h>
|
||||
#include <__utility/swap.h>
|
||||
#include <__utility/unreachable.h>
|
||||
#include <climits>
|
||||
#include <compare>
|
||||
#include <cstdio> // EOF
|
||||
#include <cstdlib>
|
||||
|
@ -539,6 +541,7 @@ basic_string<char32_t> operator "" s( const char32_t *str, size_t len ); // C++1
|
|||
#include <initializer_list>
|
||||
#include <iosfwd>
|
||||
#include <iterator>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string_view>
|
||||
|
@ -674,17 +677,10 @@ private:
|
|||
{
|
||||
pointer __data_;
|
||||
size_type __size_;
|
||||
size_type __cap_;
|
||||
size_type __cap_ : sizeof(size_type) * CHAR_BIT - 1;
|
||||
size_type __is_long_ : 1;
|
||||
};
|
||||
|
||||
#ifdef _LIBCPP_BIG_ENDIAN
|
||||
static const size_type __short_mask = 0x01;
|
||||
static const size_type __long_mask = 0x1ul;
|
||||
#else // _LIBCPP_BIG_ENDIAN
|
||||
static const size_type __short_mask = 0x80;
|
||||
static const size_type __long_mask = ~(size_type(~0) >> 1);
|
||||
#endif // _LIBCPP_BIG_ENDIAN
|
||||
|
||||
enum {__min_cap = (sizeof(__long) - 1)/sizeof(value_type) > 2 ?
|
||||
(sizeof(__long) - 1)/sizeof(value_type) : 2};
|
||||
|
||||
|
@ -692,26 +688,46 @@ private:
|
|||
{
|
||||
value_type __data_[__min_cap];
|
||||
unsigned char __padding[sizeof(value_type) - 1];
|
||||
unsigned char __size_;
|
||||
unsigned char __size_ : 7;
|
||||
unsigned char __is_long_ : 1;
|
||||
};
|
||||
|
||||
// The __endian_factor is required because the field we use to store the size
|
||||
// (either size_type or unsigned char depending on long/short) has one fewer
|
||||
// bit than it would if it were not a bitfield.
|
||||
//
|
||||
// If the LSB is used to store the short-flag in the short string representation,
|
||||
// we have to multiply the size by two when it is stored and divide it by two when
|
||||
// it is loaded to make sure that we always store an even number. In the long string
|
||||
// representation, we can ignore this because we can assume that we always allocate
|
||||
// an even amount of value_types.
|
||||
//
|
||||
// If the MSB is used for the short-flag, the max_size() is numeric_limits<size_type>::max() / 2.
|
||||
// This does not impact the short string representation, since we never need the MSB
|
||||
// for representing the size of a short string anyway.
|
||||
|
||||
#ifdef _LIBCPP_BIG_ENDIAN
|
||||
static const size_type __endian_factor = 2;
|
||||
#else
|
||||
static const size_type __endian_factor = 1;
|
||||
#endif
|
||||
|
||||
#else // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
|
||||
|
||||
#ifdef _LIBCPP_BIG_ENDIAN
|
||||
static const size_type __endian_factor = 1;
|
||||
#else
|
||||
static const size_type __endian_factor = 2;
|
||||
#endif
|
||||
|
||||
struct __long
|
||||
{
|
||||
size_type __cap_;
|
||||
size_type __is_long_ : 1;
|
||||
size_type __cap_ : sizeof(size_type) * CHAR_BIT - 1;
|
||||
size_type __size_;
|
||||
pointer __data_;
|
||||
};
|
||||
|
||||
#ifdef _LIBCPP_BIG_ENDIAN
|
||||
static const size_type __short_mask = 0x80;
|
||||
static const size_type __long_mask = ~(size_type(~0) >> 1);
|
||||
#else // _LIBCPP_BIG_ENDIAN
|
||||
static const size_type __short_mask = 0x01;
|
||||
static const size_type __long_mask = 0x1ul;
|
||||
#endif // _LIBCPP_BIG_ENDIAN
|
||||
|
||||
enum {__min_cap = (sizeof(__long) - 1)/sizeof(value_type) > 2 ?
|
||||
(sizeof(__long) - 1)/sizeof(value_type) : 2};
|
||||
|
||||
|
@ -719,7 +735,10 @@ private:
|
|||
{
|
||||
union
|
||||
{
|
||||
unsigned char __size_;
|
||||
struct {
|
||||
unsigned char __is_long_ : 1;
|
||||
unsigned char __size_ : 7;
|
||||
};
|
||||
value_type __lx;
|
||||
};
|
||||
value_type __data_[__min_cap];
|
||||
|
@ -1426,8 +1445,9 @@ public:
|
|||
_LIBCPP_INLINE_VISIBILITY void __shrink_or_extend(size_type __target_capacity);
|
||||
|
||||
_LIBCPP_INLINE_VISIBILITY
|
||||
bool __is_long() const _NOEXCEPT
|
||||
{return bool(__r_.first().__s.__size_ & __short_mask);}
|
||||
bool __is_long() const _NOEXCEPT {
|
||||
return __r_.first().__s.__is_long_;
|
||||
}
|
||||
|
||||
#if _LIBCPP_DEBUG_LEVEL == 2
|
||||
|
||||
|
@ -1474,43 +1494,18 @@ private:
|
|||
_LIBCPP_HIDE_FROM_ABI allocator_type& __alloc() _NOEXCEPT { return __r_.second(); }
|
||||
_LIBCPP_HIDE_FROM_ABI const allocator_type& __alloc() const _NOEXCEPT { return __r_.second(); }
|
||||
|
||||
#ifdef _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
|
||||
_LIBCPP_INLINE_VISIBILITY
|
||||
void __set_short_size(size_type __s) _NOEXCEPT {
|
||||
_LIBCPP_ASSERT(__s < __min_cap, "__s should never be greater than or equal to the short string capacity");
|
||||
__r_.first().__s.__size_ = __s;
|
||||
__r_.first().__s.__is_long_ = false;
|
||||
}
|
||||
|
||||
_LIBCPP_INLINE_VISIBILITY
|
||||
void __set_short_size(size_type __s) _NOEXCEPT
|
||||
# ifdef _LIBCPP_BIG_ENDIAN
|
||||
{__r_.first().__s.__size_ = (unsigned char)(__s << 1);}
|
||||
# else
|
||||
{__r_.first().__s.__size_ = (unsigned char)(__s);}
|
||||
# endif
|
||||
|
||||
_LIBCPP_INLINE_VISIBILITY
|
||||
size_type __get_short_size() const _NOEXCEPT
|
||||
# ifdef _LIBCPP_BIG_ENDIAN
|
||||
{return __r_.first().__s.__size_ >> 1;}
|
||||
# else
|
||||
{return __r_.first().__s.__size_;}
|
||||
# endif
|
||||
|
||||
#else // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
|
||||
|
||||
_LIBCPP_INLINE_VISIBILITY
|
||||
void __set_short_size(size_type __s) _NOEXCEPT
|
||||
# ifdef _LIBCPP_BIG_ENDIAN
|
||||
{__r_.first().__s.__size_ = (unsigned char)(__s);}
|
||||
# else
|
||||
{__r_.first().__s.__size_ = (unsigned char)(__s << 1);}
|
||||
# endif
|
||||
|
||||
_LIBCPP_INLINE_VISIBILITY
|
||||
size_type __get_short_size() const _NOEXCEPT
|
||||
# ifdef _LIBCPP_BIG_ENDIAN
|
||||
{return __r_.first().__s.__size_;}
|
||||
# else
|
||||
{return __r_.first().__s.__size_ >> 1;}
|
||||
# endif
|
||||
|
||||
#endif // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
|
||||
size_type __get_short_size() const _NOEXCEPT {
|
||||
_LIBCPP_ASSERT(!__r_.first().__s.__is_long_, "String has to be short when trying to get the short size");
|
||||
return __r_.first().__s.__size_;
|
||||
}
|
||||
|
||||
_LIBCPP_INLINE_VISIBILITY
|
||||
void __set_long_size(size_type __s) _NOEXCEPT
|
||||
|
@ -1523,11 +1518,15 @@ private:
|
|||
{if (__is_long()) __set_long_size(__s); else __set_short_size(__s);}
|
||||
|
||||
_LIBCPP_INLINE_VISIBILITY
|
||||
void __set_long_cap(size_type __s) _NOEXCEPT
|
||||
{__r_.first().__l.__cap_ = __long_mask | __s;}
|
||||
void __set_long_cap(size_type __s) _NOEXCEPT {
|
||||
__r_.first().__l.__cap_ = __s / __endian_factor;
|
||||
__r_.first().__l.__is_long_ = true;
|
||||
}
|
||||
|
||||
_LIBCPP_INLINE_VISIBILITY
|
||||
size_type __get_long_cap() const _NOEXCEPT
|
||||
{return __r_.first().__l.__cap_ & size_type(~__long_mask);}
|
||||
size_type __get_long_cap() const _NOEXCEPT {
|
||||
return __r_.first().__l.__cap_ * __endian_factor;
|
||||
}
|
||||
|
||||
_LIBCPP_INLINE_VISIBILITY
|
||||
void __set_long_pointer(pointer __p) _NOEXCEPT
|
||||
|
@ -3225,11 +3224,12 @@ typename basic_string<_CharT, _Traits, _Allocator>::size_type
|
|||
basic_string<_CharT, _Traits, _Allocator>::max_size() const _NOEXCEPT
|
||||
{
|
||||
size_type __m = __alloc_traits::max_size(__alloc());
|
||||
#ifdef _LIBCPP_BIG_ENDIAN
|
||||
return (__m <= ~__long_mask ? __m : __m/2) - __alignment;
|
||||
#else
|
||||
return __m - __alignment;
|
||||
#endif
|
||||
if (__m <= std::numeric_limits<size_type>::max() / 2) {
|
||||
return __m - __alignment;
|
||||
} else {
|
||||
bool __uses_lsb = __endian_factor == 2;
|
||||
return __uses_lsb ? __m - __alignment : (__m / 2) - __alignment;
|
||||
}
|
||||
}
|
||||
|
||||
template <class _CharT, class _Traits, class _Allocator>
|
||||
|
|
|
@ -0,0 +1,125 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// UNSUPPORTED:
|
||||
|
||||
// <string>
|
||||
|
||||
// This test ensures that the correct max_size() is returned depending on the platform.
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <string>
|
||||
|
||||
#include "test_macros.h"
|
||||
|
||||
// alignment of the string heap buffer is hardcoded to 16
|
||||
static const size_t alignment = 16;
|
||||
|
||||
void full_size() {
|
||||
std::string str;
|
||||
assert(str.max_size() == std::numeric_limits<size_t>::max() - alignment);
|
||||
|
||||
#ifndef TEST_HAS_NO_CHAR8_T
|
||||
std::u8string u8str;
|
||||
assert(u8str.max_size() == std::numeric_limits<size_t>::max() - alignment);
|
||||
#endif
|
||||
|
||||
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
|
||||
std::wstring wstr;
|
||||
assert(wstr.max_size() == std::numeric_limits<size_t>::max() / sizeof(wchar_t) - alignment);
|
||||
#endif
|
||||
|
||||
#ifndef TEST_HAS_NO_UNICODE_CHARS
|
||||
std::u16string u16str;
|
||||
std::u32string u32str;
|
||||
assert(u16str.max_size() == std::numeric_limits<size_t>::max() / 2 - alignment);
|
||||
assert(u32str.max_size() == std::numeric_limits<size_t>::max() / 4 - alignment);
|
||||
#endif
|
||||
}
|
||||
|
||||
void half_size() {
|
||||
std::string str;
|
||||
assert(str.max_size() == std::numeric_limits<size_t>::max() / 2 - alignment);
|
||||
|
||||
#ifndef TEST_HAS_NO_CHAR8_T
|
||||
std::u8string u8str;
|
||||
assert(u8str.max_size() == std::numeric_limits<size_t>::max() / 2 - alignment);
|
||||
#endif
|
||||
|
||||
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
|
||||
std::wstring wstr;
|
||||
assert(wstr.max_size() == std::numeric_limits<size_t>::max() / std::max<size_t>(2ul, sizeof(wchar_t)) - alignment);
|
||||
#endif
|
||||
|
||||
#ifndef TEST_HAS_NO_UNICODE_CHARS
|
||||
std::u16string u16str;
|
||||
std::u32string u32str;
|
||||
assert(u16str.max_size() == std::numeric_limits<size_t>::max() / 2 - alignment);
|
||||
assert(u32str.max_size() == std::numeric_limits<size_t>::max() / 4 - alignment);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool test() {
|
||||
#if _LIBCPP_ABI_VERSION == 1
|
||||
#ifdef __APPLE__
|
||||
|
||||
# ifdef __aarch64__
|
||||
half_size();
|
||||
# elif defined(__x86_64__)
|
||||
full_size();
|
||||
# else
|
||||
# error "Your target system seems to be unsupported."
|
||||
# endif
|
||||
|
||||
#elif defined(__linux__)
|
||||
|
||||
# ifdef __x86_64__
|
||||
full_size();
|
||||
# elif defined(__arm__) || defined(__aarch64__)
|
||||
# ifdef __BIG_ENDIAN__
|
||||
half_size();
|
||||
# else
|
||||
full_size();
|
||||
# endif
|
||||
|
||||
# else
|
||||
# error "Your target system seems to be unsupported."
|
||||
# endif
|
||||
|
||||
#elif defined(__powerpc__)
|
||||
half_size();
|
||||
#elif defined(__powepc64__)
|
||||
half_size();
|
||||
|
||||
#elif defined(_WIN64)
|
||||
|
||||
# ifdef __x86_64__
|
||||
full_size();
|
||||
# else
|
||||
# error "Your target system seems to be unsupported."
|
||||
# endif
|
||||
|
||||
#else
|
||||
# error "Your target system seems to be unsupported."
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int, char**) {
|
||||
test();
|
||||
#if TEST_STD_VER > 17
|
||||
// static_assert(test());
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -9,6 +9,11 @@
|
|||
// UNSUPPORTED: no-exceptions
|
||||
// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}}
|
||||
|
||||
// Prior to http://llvm.org/D123580, there was a bug with how the max_size()
|
||||
// was calculated. That was inlined into some functions in the dylib, which leads
|
||||
// to failures when running this test against an older system dylib.
|
||||
// XFAIL: use_system_cxx_lib && target=arm64-apple-macosx{{11.0|12.0}}
|
||||
|
||||
// <string>
|
||||
|
||||
// size_type max_size() const;
|
||||
|
|
|
@ -192,26 +192,6 @@ def _value_of_pair_first(value):
|
|||
class StdStringPrinter(object):
|
||||
"""Print a std::string."""
|
||||
|
||||
def _get_short_size(self, short_field, short_size):
|
||||
"""Short size depends on both endianness and a compile-time define."""
|
||||
|
||||
# If the padding field is present after all this indirection, then string
|
||||
# was compiled with _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT defined.
|
||||
field = short_field.type.fields()[1].type.fields()[0]
|
||||
libcpp_abi_alternate_string_layout = field.name and "__padding" in field.name
|
||||
|
||||
# This logical structure closely follows the original code (which is clearer
|
||||
# in C++). Keep them parallel to make them easier to compare.
|
||||
if libcpp_abi_alternate_string_layout:
|
||||
if _libcpp_big_endian:
|
||||
return short_size >> 1
|
||||
else:
|
||||
return short_size
|
||||
elif _libcpp_big_endian:
|
||||
return short_size
|
||||
else:
|
||||
return short_size >> 1
|
||||
|
||||
def __init__(self, val):
|
||||
self.val = val
|
||||
|
||||
|
@ -223,18 +203,13 @@ class StdStringPrinter(object):
|
|||
short_size = short_field["__size_"]
|
||||
if short_size == 0:
|
||||
return ""
|
||||
short_mask = self.val["__short_mask"]
|
||||
# Counter intuitive to compare the size and short_mask to see if the string
|
||||
# is long, but that's the way the implementation does it. Note that
|
||||
# __is_long() doesn't use get_short_size in C++.
|
||||
is_long = short_size & short_mask
|
||||
if is_long:
|
||||
if short_field["__is_long_"]:
|
||||
long_field = value_field["__l"]
|
||||
data = long_field["__data_"]
|
||||
size = long_field["__size_"]
|
||||
else:
|
||||
data = short_field["__data_"]
|
||||
size = self._get_short_size(short_field, short_size)
|
||||
size = short_field["__size_"]
|
||||
return data.lazy_string(length=size)
|
||||
|
||||
def display_hint(self):
|
||||
|
|
Loading…
Reference in New Issue