forked from OSchip/llvm-project
[libc++] Use bit field for checking if string is in long or short mode
This makes the code a bit simpler and (I think) removes the undefined behaviour from the normal string layout. Reviewed By: ldionne, Mordante, #libc Spies: labath, dblaikie, JDevlieghere, krytarowski, jgorbe, jingham, saugustine, arichardson, libcxx-commits Differential Revision: https://reviews.llvm.org/D123580
This commit is contained in:
parent
1056c56786
commit
29c8c070a1
|
@ -532,6 +532,8 @@ basic_string<char32_t> operator "" s( const char32_t *str, size_t len ); // C++1
|
||||||
#include <__utility/auto_cast.h>
|
#include <__utility/auto_cast.h>
|
||||||
#include <__utility/move.h>
|
#include <__utility/move.h>
|
||||||
#include <__utility/swap.h>
|
#include <__utility/swap.h>
|
||||||
|
#include <__utility/unreachable.h>
|
||||||
|
#include <climits>
|
||||||
#include <compare>
|
#include <compare>
|
||||||
#include <cstdio> // EOF
|
#include <cstdio> // EOF
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
@ -539,6 +541,7 @@ basic_string<char32_t> operator "" s( const char32_t *str, size_t len ); // C++1
|
||||||
#include <initializer_list>
|
#include <initializer_list>
|
||||||
#include <iosfwd>
|
#include <iosfwd>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
|
#include <limits>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
|
@ -674,17 +677,10 @@ private:
|
||||||
{
|
{
|
||||||
pointer __data_;
|
pointer __data_;
|
||||||
size_type __size_;
|
size_type __size_;
|
||||||
size_type __cap_;
|
size_type __cap_ : sizeof(size_type) * CHAR_BIT - 1;
|
||||||
|
size_type __is_long_ : 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef _LIBCPP_BIG_ENDIAN
|
|
||||||
static const size_type __short_mask = 0x01;
|
|
||||||
static const size_type __long_mask = 0x1ul;
|
|
||||||
#else // _LIBCPP_BIG_ENDIAN
|
|
||||||
static const size_type __short_mask = 0x80;
|
|
||||||
static const size_type __long_mask = ~(size_type(~0) >> 1);
|
|
||||||
#endif // _LIBCPP_BIG_ENDIAN
|
|
||||||
|
|
||||||
enum {__min_cap = (sizeof(__long) - 1)/sizeof(value_type) > 2 ?
|
enum {__min_cap = (sizeof(__long) - 1)/sizeof(value_type) > 2 ?
|
||||||
(sizeof(__long) - 1)/sizeof(value_type) : 2};
|
(sizeof(__long) - 1)/sizeof(value_type) : 2};
|
||||||
|
|
||||||
|
@ -692,26 +688,46 @@ private:
|
||||||
{
|
{
|
||||||
value_type __data_[__min_cap];
|
value_type __data_[__min_cap];
|
||||||
unsigned char __padding[sizeof(value_type) - 1];
|
unsigned char __padding[sizeof(value_type) - 1];
|
||||||
unsigned char __size_;
|
unsigned char __size_ : 7;
|
||||||
|
unsigned char __is_long_ : 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// The __endian_factor is required because the field we use to store the size
|
||||||
|
// (either size_type or unsigned char depending on long/short) has one fewer
|
||||||
|
// bit than it would if it were not a bitfield.
|
||||||
|
//
|
||||||
|
// If the LSB is used to store the short-flag in the short string representation,
|
||||||
|
// we have to multiply the size by two when it is stored and divide it by two when
|
||||||
|
// it is loaded to make sure that we always store an even number. In the long string
|
||||||
|
// representation, we can ignore this because we can assume that we always allocate
|
||||||
|
// an even amount of value_types.
|
||||||
|
//
|
||||||
|
// If the MSB is used for the short-flag, the max_size() is numeric_limits<size_type>::max() / 2.
|
||||||
|
// This does not impact the short string representation, since we never need the MSB
|
||||||
|
// for representing the size of a short string anyway.
|
||||||
|
|
||||||
|
#ifdef _LIBCPP_BIG_ENDIAN
|
||||||
|
static const size_type __endian_factor = 2;
|
||||||
#else
|
#else
|
||||||
|
static const size_type __endian_factor = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
|
||||||
|
|
||||||
|
#ifdef _LIBCPP_BIG_ENDIAN
|
||||||
|
static const size_type __endian_factor = 1;
|
||||||
|
#else
|
||||||
|
static const size_type __endian_factor = 2;
|
||||||
|
#endif
|
||||||
|
|
||||||
struct __long
|
struct __long
|
||||||
{
|
{
|
||||||
size_type __cap_;
|
size_type __is_long_ : 1;
|
||||||
|
size_type __cap_ : sizeof(size_type) * CHAR_BIT - 1;
|
||||||
size_type __size_;
|
size_type __size_;
|
||||||
pointer __data_;
|
pointer __data_;
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef _LIBCPP_BIG_ENDIAN
|
|
||||||
static const size_type __short_mask = 0x80;
|
|
||||||
static const size_type __long_mask = ~(size_type(~0) >> 1);
|
|
||||||
#else // _LIBCPP_BIG_ENDIAN
|
|
||||||
static const size_type __short_mask = 0x01;
|
|
||||||
static const size_type __long_mask = 0x1ul;
|
|
||||||
#endif // _LIBCPP_BIG_ENDIAN
|
|
||||||
|
|
||||||
enum {__min_cap = (sizeof(__long) - 1)/sizeof(value_type) > 2 ?
|
enum {__min_cap = (sizeof(__long) - 1)/sizeof(value_type) > 2 ?
|
||||||
(sizeof(__long) - 1)/sizeof(value_type) : 2};
|
(sizeof(__long) - 1)/sizeof(value_type) : 2};
|
||||||
|
|
||||||
|
@ -719,7 +735,10 @@ private:
|
||||||
{
|
{
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
unsigned char __size_;
|
struct {
|
||||||
|
unsigned char __is_long_ : 1;
|
||||||
|
unsigned char __size_ : 7;
|
||||||
|
};
|
||||||
value_type __lx;
|
value_type __lx;
|
||||||
};
|
};
|
||||||
value_type __data_[__min_cap];
|
value_type __data_[__min_cap];
|
||||||
|
@ -1426,8 +1445,9 @@ public:
|
||||||
_LIBCPP_INLINE_VISIBILITY void __shrink_or_extend(size_type __target_capacity);
|
_LIBCPP_INLINE_VISIBILITY void __shrink_or_extend(size_type __target_capacity);
|
||||||
|
|
||||||
_LIBCPP_INLINE_VISIBILITY
|
_LIBCPP_INLINE_VISIBILITY
|
||||||
bool __is_long() const _NOEXCEPT
|
bool __is_long() const _NOEXCEPT {
|
||||||
{return bool(__r_.first().__s.__size_ & __short_mask);}
|
return __r_.first().__s.__is_long_;
|
||||||
|
}
|
||||||
|
|
||||||
#if _LIBCPP_DEBUG_LEVEL == 2
|
#if _LIBCPP_DEBUG_LEVEL == 2
|
||||||
|
|
||||||
|
@ -1474,43 +1494,18 @@ private:
|
||||||
_LIBCPP_HIDE_FROM_ABI allocator_type& __alloc() _NOEXCEPT { return __r_.second(); }
|
_LIBCPP_HIDE_FROM_ABI allocator_type& __alloc() _NOEXCEPT { return __r_.second(); }
|
||||||
_LIBCPP_HIDE_FROM_ABI const allocator_type& __alloc() const _NOEXCEPT { return __r_.second(); }
|
_LIBCPP_HIDE_FROM_ABI const allocator_type& __alloc() const _NOEXCEPT { return __r_.second(); }
|
||||||
|
|
||||||
#ifdef _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
|
_LIBCPP_INLINE_VISIBILITY
|
||||||
|
void __set_short_size(size_type __s) _NOEXCEPT {
|
||||||
|
_LIBCPP_ASSERT(__s < __min_cap, "__s should never be greater than or equal to the short string capacity");
|
||||||
|
__r_.first().__s.__size_ = __s;
|
||||||
|
__r_.first().__s.__is_long_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
_LIBCPP_INLINE_VISIBILITY
|
_LIBCPP_INLINE_VISIBILITY
|
||||||
void __set_short_size(size_type __s) _NOEXCEPT
|
size_type __get_short_size() const _NOEXCEPT {
|
||||||
# ifdef _LIBCPP_BIG_ENDIAN
|
_LIBCPP_ASSERT(!__r_.first().__s.__is_long_, "String has to be short when trying to get the short size");
|
||||||
{__r_.first().__s.__size_ = (unsigned char)(__s << 1);}
|
return __r_.first().__s.__size_;
|
||||||
# else
|
}
|
||||||
{__r_.first().__s.__size_ = (unsigned char)(__s);}
|
|
||||||
# endif
|
|
||||||
|
|
||||||
_LIBCPP_INLINE_VISIBILITY
|
|
||||||
size_type __get_short_size() const _NOEXCEPT
|
|
||||||
# ifdef _LIBCPP_BIG_ENDIAN
|
|
||||||
{return __r_.first().__s.__size_ >> 1;}
|
|
||||||
# else
|
|
||||||
{return __r_.first().__s.__size_;}
|
|
||||||
# endif
|
|
||||||
|
|
||||||
#else // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
|
|
||||||
|
|
||||||
_LIBCPP_INLINE_VISIBILITY
|
|
||||||
void __set_short_size(size_type __s) _NOEXCEPT
|
|
||||||
# ifdef _LIBCPP_BIG_ENDIAN
|
|
||||||
{__r_.first().__s.__size_ = (unsigned char)(__s);}
|
|
||||||
# else
|
|
||||||
{__r_.first().__s.__size_ = (unsigned char)(__s << 1);}
|
|
||||||
# endif
|
|
||||||
|
|
||||||
_LIBCPP_INLINE_VISIBILITY
|
|
||||||
size_type __get_short_size() const _NOEXCEPT
|
|
||||||
# ifdef _LIBCPP_BIG_ENDIAN
|
|
||||||
{return __r_.first().__s.__size_;}
|
|
||||||
# else
|
|
||||||
{return __r_.first().__s.__size_ >> 1;}
|
|
||||||
# endif
|
|
||||||
|
|
||||||
#endif // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
|
|
||||||
|
|
||||||
_LIBCPP_INLINE_VISIBILITY
|
_LIBCPP_INLINE_VISIBILITY
|
||||||
void __set_long_size(size_type __s) _NOEXCEPT
|
void __set_long_size(size_type __s) _NOEXCEPT
|
||||||
|
@ -1523,11 +1518,15 @@ private:
|
||||||
{if (__is_long()) __set_long_size(__s); else __set_short_size(__s);}
|
{if (__is_long()) __set_long_size(__s); else __set_short_size(__s);}
|
||||||
|
|
||||||
_LIBCPP_INLINE_VISIBILITY
|
_LIBCPP_INLINE_VISIBILITY
|
||||||
void __set_long_cap(size_type __s) _NOEXCEPT
|
void __set_long_cap(size_type __s) _NOEXCEPT {
|
||||||
{__r_.first().__l.__cap_ = __long_mask | __s;}
|
__r_.first().__l.__cap_ = __s / __endian_factor;
|
||||||
|
__r_.first().__l.__is_long_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
_LIBCPP_INLINE_VISIBILITY
|
_LIBCPP_INLINE_VISIBILITY
|
||||||
size_type __get_long_cap() const _NOEXCEPT
|
size_type __get_long_cap() const _NOEXCEPT {
|
||||||
{return __r_.first().__l.__cap_ & size_type(~__long_mask);}
|
return __r_.first().__l.__cap_ * __endian_factor;
|
||||||
|
}
|
||||||
|
|
||||||
_LIBCPP_INLINE_VISIBILITY
|
_LIBCPP_INLINE_VISIBILITY
|
||||||
void __set_long_pointer(pointer __p) _NOEXCEPT
|
void __set_long_pointer(pointer __p) _NOEXCEPT
|
||||||
|
@ -3225,11 +3224,12 @@ typename basic_string<_CharT, _Traits, _Allocator>::size_type
|
||||||
basic_string<_CharT, _Traits, _Allocator>::max_size() const _NOEXCEPT
|
basic_string<_CharT, _Traits, _Allocator>::max_size() const _NOEXCEPT
|
||||||
{
|
{
|
||||||
size_type __m = __alloc_traits::max_size(__alloc());
|
size_type __m = __alloc_traits::max_size(__alloc());
|
||||||
#ifdef _LIBCPP_BIG_ENDIAN
|
if (__m <= std::numeric_limits<size_type>::max() / 2) {
|
||||||
return (__m <= ~__long_mask ? __m : __m/2) - __alignment;
|
return __m - __alignment;
|
||||||
#else
|
} else {
|
||||||
return __m - __alignment;
|
bool __uses_lsb = __endian_factor == 2;
|
||||||
#endif
|
return __uses_lsb ? __m - __alignment : (__m / 2) - __alignment;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class _CharT, class _Traits, class _Allocator>
|
template <class _CharT, class _Traits, class _Allocator>
|
||||||
|
|
|
@ -0,0 +1,125 @@
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// UNSUPPORTED:
|
||||||
|
|
||||||
|
// <string>
|
||||||
|
|
||||||
|
// This test ensures that the correct max_size() is returned depending on the platform.
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "test_macros.h"
|
||||||
|
|
||||||
|
// alignment of the string heap buffer is hardcoded to 16
|
||||||
|
static const size_t alignment = 16;
|
||||||
|
|
||||||
|
void full_size() {
|
||||||
|
std::string str;
|
||||||
|
assert(str.max_size() == std::numeric_limits<size_t>::max() - alignment);
|
||||||
|
|
||||||
|
#ifndef TEST_HAS_NO_CHAR8_T
|
||||||
|
std::u8string u8str;
|
||||||
|
assert(u8str.max_size() == std::numeric_limits<size_t>::max() - alignment);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
|
||||||
|
std::wstring wstr;
|
||||||
|
assert(wstr.max_size() == std::numeric_limits<size_t>::max() / sizeof(wchar_t) - alignment);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef TEST_HAS_NO_UNICODE_CHARS
|
||||||
|
std::u16string u16str;
|
||||||
|
std::u32string u32str;
|
||||||
|
assert(u16str.max_size() == std::numeric_limits<size_t>::max() / 2 - alignment);
|
||||||
|
assert(u32str.max_size() == std::numeric_limits<size_t>::max() / 4 - alignment);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void half_size() {
|
||||||
|
std::string str;
|
||||||
|
assert(str.max_size() == std::numeric_limits<size_t>::max() / 2 - alignment);
|
||||||
|
|
||||||
|
#ifndef TEST_HAS_NO_CHAR8_T
|
||||||
|
std::u8string u8str;
|
||||||
|
assert(u8str.max_size() == std::numeric_limits<size_t>::max() / 2 - alignment);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
|
||||||
|
std::wstring wstr;
|
||||||
|
assert(wstr.max_size() == std::numeric_limits<size_t>::max() / std::max<size_t>(2ul, sizeof(wchar_t)) - alignment);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef TEST_HAS_NO_UNICODE_CHARS
|
||||||
|
std::u16string u16str;
|
||||||
|
std::u32string u32str;
|
||||||
|
assert(u16str.max_size() == std::numeric_limits<size_t>::max() / 2 - alignment);
|
||||||
|
assert(u32str.max_size() == std::numeric_limits<size_t>::max() / 4 - alignment);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
bool test() {
|
||||||
|
#if _LIBCPP_ABI_VERSION == 1
|
||||||
|
#ifdef __APPLE__
|
||||||
|
|
||||||
|
# ifdef __aarch64__
|
||||||
|
half_size();
|
||||||
|
# elif defined(__x86_64__)
|
||||||
|
full_size();
|
||||||
|
# else
|
||||||
|
# error "Your target system seems to be unsupported."
|
||||||
|
# endif
|
||||||
|
|
||||||
|
#elif defined(__linux__)
|
||||||
|
|
||||||
|
# ifdef __x86_64__
|
||||||
|
full_size();
|
||||||
|
# elif defined(__arm__) || defined(__aarch64__)
|
||||||
|
# ifdef __BIG_ENDIAN__
|
||||||
|
half_size();
|
||||||
|
# else
|
||||||
|
full_size();
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# else
|
||||||
|
# error "Your target system seems to be unsupported."
|
||||||
|
# endif
|
||||||
|
|
||||||
|
#elif defined(__powerpc__)
|
||||||
|
half_size();
|
||||||
|
#elif defined(__powepc64__)
|
||||||
|
half_size();
|
||||||
|
|
||||||
|
#elif defined(_WIN64)
|
||||||
|
|
||||||
|
# ifdef __x86_64__
|
||||||
|
full_size();
|
||||||
|
# else
|
||||||
|
# error "Your target system seems to be unsupported."
|
||||||
|
# endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
# error "Your target system seems to be unsupported."
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int, char**) {
|
||||||
|
test();
|
||||||
|
#if TEST_STD_VER > 17
|
||||||
|
// static_assert(test());
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -9,6 +9,11 @@
|
||||||
// UNSUPPORTED: no-exceptions
|
// UNSUPPORTED: no-exceptions
|
||||||
// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}}
|
// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}}
|
||||||
|
|
||||||
|
// Prior to http://llvm.org/D123580, there was a bug with how the max_size()
|
||||||
|
// was calculated. That was inlined into some functions in the dylib, which leads
|
||||||
|
// to failures when running this test against an older system dylib.
|
||||||
|
// XFAIL: use_system_cxx_lib && target=arm64-apple-macosx{{11.0|12.0}}
|
||||||
|
|
||||||
// <string>
|
// <string>
|
||||||
|
|
||||||
// size_type max_size() const;
|
// size_type max_size() const;
|
||||||
|
|
|
@ -192,26 +192,6 @@ def _value_of_pair_first(value):
|
||||||
class StdStringPrinter(object):
|
class StdStringPrinter(object):
|
||||||
"""Print a std::string."""
|
"""Print a std::string."""
|
||||||
|
|
||||||
def _get_short_size(self, short_field, short_size):
|
|
||||||
"""Short size depends on both endianness and a compile-time define."""
|
|
||||||
|
|
||||||
# If the padding field is present after all this indirection, then string
|
|
||||||
# was compiled with _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT defined.
|
|
||||||
field = short_field.type.fields()[1].type.fields()[0]
|
|
||||||
libcpp_abi_alternate_string_layout = field.name and "__padding" in field.name
|
|
||||||
|
|
||||||
# This logical structure closely follows the original code (which is clearer
|
|
||||||
# in C++). Keep them parallel to make them easier to compare.
|
|
||||||
if libcpp_abi_alternate_string_layout:
|
|
||||||
if _libcpp_big_endian:
|
|
||||||
return short_size >> 1
|
|
||||||
else:
|
|
||||||
return short_size
|
|
||||||
elif _libcpp_big_endian:
|
|
||||||
return short_size
|
|
||||||
else:
|
|
||||||
return short_size >> 1
|
|
||||||
|
|
||||||
def __init__(self, val):
|
def __init__(self, val):
|
||||||
self.val = val
|
self.val = val
|
||||||
|
|
||||||
|
@ -223,18 +203,13 @@ class StdStringPrinter(object):
|
||||||
short_size = short_field["__size_"]
|
short_size = short_field["__size_"]
|
||||||
if short_size == 0:
|
if short_size == 0:
|
||||||
return ""
|
return ""
|
||||||
short_mask = self.val["__short_mask"]
|
if short_field["__is_long_"]:
|
||||||
# Counter intuitive to compare the size and short_mask to see if the string
|
|
||||||
# is long, but that's the way the implementation does it. Note that
|
|
||||||
# __is_long() doesn't use get_short_size in C++.
|
|
||||||
is_long = short_size & short_mask
|
|
||||||
if is_long:
|
|
||||||
long_field = value_field["__l"]
|
long_field = value_field["__l"]
|
||||||
data = long_field["__data_"]
|
data = long_field["__data_"]
|
||||||
size = long_field["__size_"]
|
size = long_field["__size_"]
|
||||||
else:
|
else:
|
||||||
data = short_field["__data_"]
|
data = short_field["__data_"]
|
||||||
size = self._get_short_size(short_field, short_size)
|
size = short_field["__size_"]
|
||||||
return data.lazy_string(length=size)
|
return data.lazy_string(length=size)
|
||||||
|
|
||||||
def display_hint(self):
|
def display_hint(self):
|
||||||
|
|
Loading…
Reference in New Issue