[libc++] Add missing short wchar handling for codecvt_utf8, codecvt_utf16 and codecvt_utf8_utf16

Summary:
AIX have 2 byte wchar in 32 bit mode and 4 byte wchar in 64 bit mode.
This patch add more missing short wchar handling under the existing _LIBCPP_SHORT_WCHAR macro.

Marked test case ctor_move.pass.cpp as XFAIL for 32-bit mode on AIX because UTF-8 constants used cannot be converted to 2-byte wchar (by xingxue).

Authored by: jasonliu

Reviewed by: ldionne, zibi, SeanP, libc++

Differential Revision: https://reviews.llvm.org/D100777
This commit is contained in:
Xing Xue 2021-09-09 16:20:36 -04:00
parent af382b9383
commit f53fafbacb
7 changed files with 588 additions and 656 deletions

View File

@ -305,6 +305,11 @@
# endif
#endif // __sun__
#if defined(_AIX) && !defined(__64BIT__)
// The size of wchar is 2 byte on 32-bit mode on AIX.
# define _LIBCPP_SHORT_WCHAR 1
#endif
#if defined(__OpenBSD__)
// Certain architectures provide arc4random(). Prefer using
// arc4random() over /dev/{u,}random to make it possible to obtain

View File

@ -3485,15 +3485,25 @@ __codecvt_utf8<wchar_t>::do_length(state_type&,
{
const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
#if defined(_LIBCPP_SHORT_WCHAR)
return utf8_to_ucs2_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
#else
return utf8_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
#endif
}
int
__codecvt_utf8<wchar_t>::do_max_length() const noexcept
{
#if defined(_LIBCPP_SHORT_WCHAR)
if (_Mode_ & consume_header)
return 6;
return 3;
#else
if (_Mode_ & consume_header)
return 7;
return 4;
#endif
}
// __codecvt_utf8<char16_t>
@ -3653,14 +3663,25 @@ __codecvt_utf16<wchar_t, false>::do_out(state_type&,
const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
{
#if defined(_LIBCPP_SHORT_WCHAR)
const uint16_t* _frm = reinterpret_cast<const uint16_t*>(frm);
const uint16_t* _frm_end = reinterpret_cast<const uint16_t*>(frm_end);
const uint16_t* _frm_nxt = _frm;
#else
const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
const uint32_t* _frm_nxt = _frm;
#endif
uint8_t* _to = reinterpret_cast<uint8_t*>(to);
uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
uint8_t* _to_nxt = _to;
#if defined(_LIBCPP_SHORT_WCHAR)
result r = ucs2_to_utf16be(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
#else
result r = ucs4_to_utf16be(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
#endif
frm_nxt = frm + (_frm_nxt - _frm);
to_nxt = to + (_to_nxt - _to);
return r;
@ -3674,11 +3695,19 @@ __codecvt_utf16<wchar_t, false>::do_in(state_type&,
const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
const uint8_t* _frm_nxt = _frm;
#if defined(_LIBCPP_SHORT_WCHAR)
uint16_t* _to = reinterpret_cast<uint16_t*>(to);
uint16_t* _to_end = reinterpret_cast<uint16_t*>(to_end);
uint16_t* _to_nxt = _to;
result r = utf16be_to_ucs2(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
#else
uint32_t* _to = reinterpret_cast<uint32_t*>(to);
uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
uint32_t* _to_nxt = _to;
result r = utf16be_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
#endif
frm_nxt = frm + (_frm_nxt - _frm);
to_nxt = to + (_to_nxt - _to);
return r;
@ -3710,15 +3739,25 @@ __codecvt_utf16<wchar_t, false>::do_length(state_type&,
{
const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
#if defined(_LIBCPP_SHORT_WCHAR)
return utf16be_to_ucs2_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
#else
return utf16be_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
#endif
}
int
__codecvt_utf16<wchar_t, false>::do_max_length() const noexcept
{
#if defined(_LIBCPP_SHORT_WCHAR)
if (_Mode_ & consume_header)
return 4;
return 2;
#else
if (_Mode_ & consume_header)
return 6;
return 4;
#endif
}
// __codecvt_utf16<wchar_t, true>
@ -3728,14 +3767,25 @@ __codecvt_utf16<wchar_t, true>::do_out(state_type&,
const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
{
#if defined(_LIBCPP_SHORT_WCHAR)
const uint16_t* _frm = reinterpret_cast<const uint16_t*>(frm);
const uint16_t* _frm_end = reinterpret_cast<const uint16_t*>(frm_end);
const uint16_t* _frm_nxt = _frm;
#else
const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
const uint32_t* _frm_nxt = _frm;
#endif
uint8_t* _to = reinterpret_cast<uint8_t*>(to);
uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
uint8_t* _to_nxt = _to;
#if defined(_LIBCPP_SHORT_WCHAR)
result r = ucs2_to_utf16le(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
#else
result r = ucs4_to_utf16le(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
#endif
frm_nxt = frm + (_frm_nxt - _frm);
to_nxt = to + (_to_nxt - _to);
return r;
@ -3749,11 +3799,19 @@ __codecvt_utf16<wchar_t, true>::do_in(state_type&,
const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
const uint8_t* _frm_nxt = _frm;
#if defined(_LIBCPP_SHORT_WCHAR)
uint16_t* _to = reinterpret_cast<uint16_t*>(to);
uint16_t* _to_end = reinterpret_cast<uint16_t*>(to_end);
uint16_t* _to_nxt = _to;
result r = utf16le_to_ucs2(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
#else
uint32_t* _to = reinterpret_cast<uint32_t*>(to);
uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
uint32_t* _to_nxt = _to;
result r = utf16le_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
#endif
frm_nxt = frm + (_frm_nxt - _frm);
to_nxt = to + (_to_nxt - _to);
return r;
@ -3785,15 +3843,25 @@ __codecvt_utf16<wchar_t, true>::do_length(state_type&,
{
const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
#if defined(_LIBCPP_SHORT_WCHAR)
return utf16le_to_ucs2_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
#else
return utf16le_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
#endif
}
int
__codecvt_utf16<wchar_t, true>::do_max_length() const noexcept
{
#if defined(_LIBCPP_SHORT_WCHAR)
if (_Mode_ & consume_header)
return 4;
return 2;
#else
if (_Mode_ & consume_header)
return 6;
return 4;
#endif
}
// __codecvt_utf16<char16_t, false>
@ -4103,9 +4171,15 @@ __codecvt_utf8_utf16<wchar_t>::do_out(state_type&,
const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
{
#if defined(_LIBCPP_SHORT_WCHAR)
const uint16_t* _frm = reinterpret_cast<const uint16_t*>(frm);
const uint16_t* _frm_end = reinterpret_cast<const uint16_t*>(frm_end);
const uint16_t* _frm_nxt = _frm;
#else
const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
const uint32_t* _frm_nxt = _frm;
#endif
uint8_t* _to = reinterpret_cast<uint8_t*>(to);
uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
uint8_t* _to_nxt = _to;
@ -4124,9 +4198,15 @@ __codecvt_utf8_utf16<wchar_t>::do_in(state_type&,
const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
const uint8_t* _frm_nxt = _frm;
#if defined(_LIBCPP_SHORT_WCHAR)
uint16_t* _to = reinterpret_cast<uint16_t*>(to);
uint16_t* _to_end = reinterpret_cast<uint16_t*>(to_end);
uint16_t* _to_nxt = _to;
#else
uint32_t* _to = reinterpret_cast<uint32_t*>(to);
uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
uint32_t* _to_nxt = _to;
#endif
result r = utf8_to_utf16(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
frm_nxt = frm + (_frm_nxt - _frm);

View File

@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
// 'do_bytes' throws a std::range_error unexpectedly
// XFAIL: LIBCXX-WINDOWS-FIXME
// XFAIL: LIBCXX-WINDOWS-FIXME, powerpc-ibm-aix
// UNSUPPORTED: c++03

View File

@ -24,310 +24,42 @@
#include "test_macros.h"
int main(int, char**)
{
{
typedef std::codecvt_utf16<wchar_t> C;
C c;
char n[4] = {char(0xD8), char(0xC0), char(0xDC), char(0x03)};
std::mbstate_t m;
int r = c.length(m, n, n+4, 2);
assert(r == 4);
template <class CharT, size_t = sizeof(CharT)>
struct TestHelper;
n[0] = char(0x10);
n[1] = char(0x05);
r = c.length(m, n, n+2, 2);
assert(r == 2);
template <class CharT>
struct TestHelper<CharT, 2> {
static void test();
};
n[0] = char(0x04);
n[1] = char(0x53);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[0] = char(0x00);
n[1] = char(0x56);
r = c.length(m, n, n+2, 2);
assert(r == 2);
}
{
typedef std::codecvt_utf16<wchar_t, 0x1000> C;
C c;
char n[4] = {char(0xD8), char(0xC0), char(0xDC), char(0x03)};
std::mbstate_t m;
int r = c.length(m, n, n+4, 2);
assert(r == 0);
n[0] = char(0x10);
n[1] = char(0x05);
r = c.length(m, n, n+2, 2);
assert(r == 0);
n[0] = char(0x04);
n[1] = char(0x53);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[0] = char(0x00);
n[1] = char(0x56);
r = c.length(m, n, n+2, 2);
assert(r == 2);
}
{
typedef std::codecvt_utf16<wchar_t, 0x10ffff, std::consume_header> C;
C c;
char n[6] = {char(0xFE), char(0xFF), char(0xD8), char(0xC0), char(0xDC), char(0x03)};
std::mbstate_t m;
int r = c.length(m, n, n+6, 2);
assert(r == 6);
n[0] = char(0x10);
n[1] = char(0x05);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[0] = char(0x04);
n[1] = char(0x53);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[0] = char(0x00);
n[1] = char(0x56);
r = c.length(m, n, n+2, 2);
assert(r == 2);
}
{
typedef std::codecvt_utf16<wchar_t, 0x10ffff, std::little_endian> C;
C c;
char n[4] = {char(0xC0), char(0xD8), char(0x03), char(0xDC)};
std::mbstate_t m;
int r = c.length(m, n, n+4, 2);
assert(r == 4);
n[1] = char(0x10);
n[0] = char(0x05);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[1] = char(0x04);
n[0] = char(0x53);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[1] = char(0x00);
n[0] = char(0x56);
r = c.length(m, n, n+2, 2);
assert(r == 2);
}
{
typedef std::codecvt_utf16<wchar_t, 0x1000, std::little_endian> C;
C c;
char n[4] = {char(0xC0), char(0xD8), char(0x03), char(0xDC)};
std::mbstate_t m;
int r = c.length(m, n, n+4, 2);
assert(r == 0);
n[1] = char(0x10);
n[0] = char(0x05);
r = c.length(m, n, n+2, 2);
assert(r == 0);
n[1] = char(0x04);
n[0] = char(0x53);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[1] = char(0x00);
n[0] = char(0x56);
r = c.length(m, n, n+2, 2);
assert(r == 2);
}
{
typedef std::codecvt_utf16<wchar_t, 0x10ffff, std::codecvt_mode(
std::consume_header |
std::little_endian)> C;
C c;
char n[6] = {char(0xFF), char(0xFE), char(0xC0), char(0xD8), char(0x03), char(0xDC)};
std::mbstate_t m;
int r = c.length(m, n, n+6, 2);
assert(r == 6);
n[1] = char(0x10);
n[0] = char(0x05);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[1] = char(0x04);
n[0] = char(0x53);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[1] = char(0x00);
n[0] = char(0x56);
r = c.length(m, n, n+2, 2);
assert(r == 2);
}
{
typedef std::codecvt_utf16<char32_t> C;
C c;
char n[4] = {char(0xD8), char(0xC0), char(0xDC), char(0x03)};
std::mbstate_t m;
int r = c.length(m, n, n+4, 2);
assert(r == 4);
n[0] = char(0x10);
n[1] = char(0x05);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[0] = char(0x04);
n[1] = char(0x53);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[0] = char(0x00);
n[1] = char(0x56);
r = c.length(m, n, n+2, 2);
assert(r == 2);
}
{
typedef std::codecvt_utf16<char32_t, 0x1000> C;
C c;
char n[4] = {char(0xD8), char(0xC0), char(0xDC), char(0x03)};
std::mbstate_t m;
int r = c.length(m, n, n+4, 2);
assert(r == 0);
n[0] = char(0x10);
n[1] = char(0x05);
r = c.length(m, n, n+2, 2);
assert(r == 0);
n[0] = char(0x04);
n[1] = char(0x53);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[0] = char(0x00);
n[1] = char(0x56);
r = c.length(m, n, n+2, 2);
assert(r == 2);
}
{
typedef std::codecvt_utf16<char32_t, 0x10ffff, std::consume_header> C;
C c;
char n[6] = {char(0xFE), char(0xFF), char(0xD8), char(0xC0), char(0xDC), char(0x03)};
std::mbstate_t m;
int r = c.length(m, n, n+6, 2);
assert(r == 6);
n[0] = char(0x10);
n[1] = char(0x05);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[0] = char(0x04);
n[1] = char(0x53);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[0] = char(0x00);
n[1] = char(0x56);
r = c.length(m, n, n+2, 2);
assert(r == 2);
}
{
typedef std::codecvt_utf16<char32_t, 0x10ffff, std::little_endian> C;
C c;
char n[4] = {char(0xC0), char(0xD8), char(0x03), char(0xDC)};
std::mbstate_t m;
int r = c.length(m, n, n+4, 2);
assert(r == 4);
n[1] = char(0x10);
n[0] = char(0x05);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[1] = char(0x04);
n[0] = char(0x53);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[1] = char(0x00);
n[0] = char(0x56);
r = c.length(m, n, n+2, 2);
assert(r == 2);
}
{
typedef std::codecvt_utf16<char32_t, 0x1000, std::little_endian> C;
C c;
char n[4] = {char(0xC0), char(0xD8), char(0x03), char(0xDC)};
std::mbstate_t m;
int r = c.length(m, n, n+4, 2);
assert(r == 0);
n[1] = char(0x10);
n[0] = char(0x05);
r = c.length(m, n, n+2, 2);
assert(r == 0);
n[1] = char(0x04);
n[0] = char(0x53);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[1] = char(0x00);
n[0] = char(0x56);
r = c.length(m, n, n+2, 2);
assert(r == 2);
}
{
typedef std::codecvt_utf16<char32_t, 0x10ffff, std::codecvt_mode(
std::consume_header |
std::little_endian)> C;
C c;
char n[6] = {char(0xFF), char(0xFE), char(0xC0), char(0xD8), char(0x03), char(0xDC)};
std::mbstate_t m;
int r = c.length(m, n, n+6, 2);
assert(r == 6);
n[1] = char(0x10);
n[0] = char(0x05);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[1] = char(0x04);
n[0] = char(0x53);
r = c.length(m, n, n+2, 2);
assert(r == 2);
n[1] = char(0x00);
n[0] = char(0x56);
r = c.length(m, n, n+2, 2);
assert(r == 2);
}
template <class CharT>
struct TestHelper<CharT, 4> {
static void test();
};
template <class CharT>
void TestHelper<CharT, 2>::test() {
{
typedef std::codecvt_utf16<char16_t> C;
C c;
char n[4] = {char(0xD8), char(0xC0), char(0xDC), char(0x03)};
std::mbstate_t m;
int r = c.length(m, n, n+4, 2);
int r = c.length(m, n, n + 4, 2);
assert(r == 0);
n[0] = char(0x10);
n[1] = char(0x05);
r = c.length(m, n, n+2, 2);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[0] = char(0x04);
n[1] = char(0x53);
r = c.length(m, n, n+2, 2);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[0] = char(0x00);
n[1] = char(0x56);
r = c.length(m, n, n+2, 2);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
}
{
@ -335,22 +67,22 @@ int main(int, char**)
C c;
char n[4] = {char(0xD8), char(0xC0), char(0xDC), char(0x03)};
std::mbstate_t m;
int r = c.length(m, n, n+4, 2);
int r = c.length(m, n, n + 4, 2);
assert(r == 0);
n[0] = char(0x10);
n[1] = char(0x05);
r = c.length(m, n, n+2, 2);
r = c.length(m, n, n + 2, 2);
assert(r == 0);
n[0] = char(0x04);
n[1] = char(0x53);
r = c.length(m, n, n+2, 2);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[0] = char(0x00);
n[1] = char(0x56);
r = c.length(m, n, n+2, 2);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
}
{
@ -358,22 +90,22 @@ int main(int, char**)
C c;
char n[6] = {char(0xFE), char(0xFF), char(0xD8), char(0xC0), char(0xDC), char(0x03)};
std::mbstate_t m;
int r = c.length(m, n, n+6, 2);
int r = c.length(m, n, n + 6, 2);
assert(r == 2);
n[0] = char(0x10);
n[1] = char(0x05);
r = c.length(m, n, n+2, 2);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[0] = char(0x04);
n[1] = char(0x53);
r = c.length(m, n, n+2, 2);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[0] = char(0x00);
n[1] = char(0x56);
r = c.length(m, n, n+2, 2);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
}
{
@ -381,22 +113,22 @@ int main(int, char**)
C c;
char n[4] = {char(0xC0), char(0xD8), char(0x03), char(0xDC)};
std::mbstate_t m;
int r = c.length(m, n, n+4, 2);
int r = c.length(m, n, n + 4, 2);
assert(r == 0);
n[1] = char(0x10);
n[0] = char(0x05);
r = c.length(m, n, n+2, 2);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[1] = char(0x04);
n[0] = char(0x53);
r = c.length(m, n, n+2, 2);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[1] = char(0x00);
n[0] = char(0x56);
r = c.length(m, n, n+2, 2);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
}
{
@ -404,49 +136,194 @@ int main(int, char**)
C c;
char n[4] = {char(0xC0), char(0xD8), char(0x03), char(0xDC)};
std::mbstate_t m;
int r = c.length(m, n, n+4, 2);
int r = c.length(m, n, n + 4, 2);
assert(r == 0);
n[1] = char(0x10);
n[0] = char(0x05);
r = c.length(m, n, n+2, 2);
r = c.length(m, n, n + 2, 2);
assert(r == 0);
n[1] = char(0x04);
n[0] = char(0x53);
r = c.length(m, n, n+2, 2);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[1] = char(0x00);
n[0] = char(0x56);
r = c.length(m, n, n+2, 2);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
}
{
typedef std::codecvt_utf16<char16_t, 0x10ffff, std::codecvt_mode(
std::consume_header |
std::little_endian)> C;
typedef std::codecvt_utf16<char16_t, 0x10ffff, std::codecvt_mode(std::consume_header | std::little_endian)> C;
C c;
char n[6] = {char(0xFF), char(0xFE), char(0xC0), char(0xD8), char(0x03), char(0xDC)};
std::mbstate_t m;
int r = c.length(m, n, n+6, 2);
int r = c.length(m, n, n + 6, 2);
assert(r == 2);
n[1] = char(0x10);
n[0] = char(0x05);
r = c.length(m, n, n+2, 2);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[1] = char(0x04);
n[0] = char(0x53);
r = c.length(m, n, n+2, 2);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[1] = char(0x00);
n[0] = char(0x56);
r = c.length(m, n, n+2, 2);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
}
}
template <class CharT>
void TestHelper<CharT, 4>::test() {
{
typedef std::codecvt_utf16<char32_t> C;
C c;
char n[4] = {char(0xD8), char(0xC0), char(0xDC), char(0x03)};
std::mbstate_t m;
int r = c.length(m, n, n + 4, 2);
assert(r == 4);
n[0] = char(0x10);
n[1] = char(0x05);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[0] = char(0x04);
n[1] = char(0x53);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[0] = char(0x00);
n[1] = char(0x56);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
}
{
typedef std::codecvt_utf16<char32_t, 0x1000> C;
C c;
char n[4] = {char(0xD8), char(0xC0), char(0xDC), char(0x03)};
std::mbstate_t m;
int r = c.length(m, n, n + 4, 2);
assert(r == 0);
n[0] = char(0x10);
n[1] = char(0x05);
r = c.length(m, n, n + 2, 2);
assert(r == 0);
n[0] = char(0x04);
n[1] = char(0x53);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[0] = char(0x00);
n[1] = char(0x56);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
}
{
typedef std::codecvt_utf16<char32_t, 0x10ffff, std::consume_header> C;
C c;
char n[6] = {char(0xFE), char(0xFF), char(0xD8), char(0xC0), char(0xDC), char(0x03)};
std::mbstate_t m;
int r = c.length(m, n, n + 6, 2);
assert(r == 6);
n[0] = char(0x10);
n[1] = char(0x05);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[0] = char(0x04);
n[1] = char(0x53);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[0] = char(0x00);
n[1] = char(0x56);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
}
{
typedef std::codecvt_utf16<char32_t, 0x10ffff, std::little_endian> C;
C c;
char n[4] = {char(0xC0), char(0xD8), char(0x03), char(0xDC)};
std::mbstate_t m;
int r = c.length(m, n, n + 4, 2);
assert(r == 4);
n[1] = char(0x10);
n[0] = char(0x05);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[1] = char(0x04);
n[0] = char(0x53);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[1] = char(0x00);
n[0] = char(0x56);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
}
{
typedef std::codecvt_utf16<char32_t, 0x1000, std::little_endian> C;
C c;
char n[4] = {char(0xC0), char(0xD8), char(0x03), char(0xDC)};
std::mbstate_t m;
int r = c.length(m, n, n + 4, 2);
assert(r == 0);
n[1] = char(0x10);
n[0] = char(0x05);
r = c.length(m, n, n + 2, 2);
assert(r == 0);
n[1] = char(0x04);
n[0] = char(0x53);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[1] = char(0x00);
n[0] = char(0x56);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
}
{
typedef std::codecvt_utf16<char32_t, 0x10ffff, std::codecvt_mode(std::consume_header | std::little_endian)> C;
C c;
char n[6] = {char(0xFF), char(0xFE), char(0xC0), char(0xD8), char(0x03), char(0xDC)};
std::mbstate_t m;
int r = c.length(m, n, n + 6, 2);
assert(r == 6);
n[1] = char(0x10);
n[0] = char(0x05);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[1] = char(0x04);
n[0] = char(0x53);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
n[1] = char(0x00);
n[0] = char(0x56);
r = c.length(m, n, n + 2, 2);
assert(r == 2);
}
}
int main(int, char**) {
TestHelper<wchar_t>::test();
TestHelper<char16_t>::test();
TestHelper<char32_t>::test();
return 0;
}

View File

@ -23,44 +23,54 @@
#include "test_macros.h"
int main(int, char**)
{
template <class CharT, size_t = sizeof(CharT)>
struct TestHelper;
template <class CharT>
struct TestHelper<CharT, 2> {
static void test();
};
template <class CharT>
struct TestHelper<CharT, 4> {
static void test();
};
template <class CharT>
void TestHelper<CharT, 2>::test() {
{
typedef std::codecvt_utf16<wchar_t> C;
C c;
int r = c.max_length();
assert(r == 4);
}
{
typedef std::codecvt_utf16<wchar_t, 0xFFFFFFFF, std::consume_header> C;
C c;
int r = c.max_length();
assert(r == 6);
}
{
typedef std::codecvt_utf16<char16_t> C;
typedef std::codecvt_utf16<CharT> C;
C c;
int r = c.max_length();
assert(r == 2);
}
{
typedef std::codecvt_utf16<char16_t, 0xFFFFFFFF, std::consume_header> C;
typedef std::codecvt_utf16<CharT, 0xFFFFFFFF, std::consume_header> C;
C c;
int r = c.max_length();
assert(r == 4);
}
}
template <class CharT>
void TestHelper<CharT, 4>::test() {
{
typedef std::codecvt_utf16<CharT> C;
C c;
int r = c.max_length();
assert(r == 4);
}
{
typedef std::codecvt_utf16<char32_t> C;
C c;
int r = c.max_length();
assert(r == 4);
}
{
typedef std::codecvt_utf16<char32_t, 0xFFFFFFFF, std::consume_header> C;
typedef std::codecvt_utf16<CharT, 0xFFFFFFFF, std::consume_header> C;
C c;
int r = c.max_length();
assert(r == 6);
}
}
int main(int, char**) {
TestHelper<wchar_t>::test();
TestHelper<char16_t>::test();
TestHelper<char32_t>::test();
return 0;
}

View File

@ -24,66 +24,155 @@
#include "test_macros.h"
int main(int, char**)
{
template <class CharT, size_t = sizeof(CharT)>
struct TestHelper;
template <class CharT>
struct TestHelper<CharT, 2> {
static void test();
};
template <class CharT>
struct TestHelper<CharT, 4> {
static void test();
};
template <class CharT>
void TestHelper<CharT, 2>::test() {
{
typedef std::codecvt_utf8<wchar_t> C;
typedef std::codecvt_utf8<CharT> C;
C c;
char n[4] = {char(0xF1), char(0x80), char(0x80), char(0x83)};
std::mbstate_t m;
int r = c.length(m, n, n+4, 1);
int r = c.length(m, n, n + 4, 1);
assert(r == 0);
n[0] = char(0xE1);
n[1] = char(0x80);
n[2] = char(0x85);
r = c.length(m, n, n + 3, 2);
assert(r == 3);
n[0] = char(0xD1);
n[1] = char(0x93);
r = c.length(m, n, n + 2, 3);
assert(r == 2);
n[0] = char(0x56);
r = c.length(m, n, n + 1, 3);
assert(r == 1);
}
{
typedef std::codecvt_utf8<CharT, 0x1000> C;
C c;
char n[4] = {char(0xF1), char(0x80), char(0x80), char(0x83)};
std::mbstate_t m;
int r = c.length(m, n, n + 4, 1);
assert(r == 0);
n[0] = char(0xE1);
n[1] = char(0x80);
n[2] = char(0x85);
r = c.length(m, n, n + 3, 2);
assert(r == 0);
n[0] = char(0xD1);
n[1] = char(0x93);
r = c.length(m, n, n + 2, 3);
assert(r == 2);
n[0] = char(0x56);
r = c.length(m, n, n + 1, 3);
assert(r == 1);
}
{
typedef std::codecvt_utf8<CharT, 0xFFFFFFFF, std::consume_header> C;
C c;
char n[7] = {char(0xEF), char(0xBB), char(0xBF), char(0xF1), char(0x80), char(0x80), char(0x83)};
std::mbstate_t m;
int r = c.length(m, n, n + 7, 1);
assert(r == 3);
n[0] = char(0xE1);
n[1] = char(0x80);
n[2] = char(0x85);
r = c.length(m, n, n + 3, 2);
assert(r == 3);
n[0] = char(0xEF);
n[1] = char(0xBB);
n[2] = char(0xBF);
n[3] = char(0xD1);
n[4] = char(0x93);
r = c.length(m, n, n + 5, 3);
assert(r == 5);
n[0] = char(0x56);
r = c.length(m, n, n + 1, 3);
assert(r == 1);
}
}
template <class CharT>
void TestHelper<CharT, 4>::test() {
{
typedef std::codecvt_utf8<CharT> C;
C c;
char n[4] = {char(0xF1), char(0x80), char(0x80), char(0x83)};
std::mbstate_t m;
int r = c.length(m, n, n + 4, 1);
assert(r == 4);
n[0] = char(0xE1);
n[1] = char(0x80);
n[2] = char(0x85);
r = c.length(m, n, n+3, 2);
r = c.length(m, n, n + 3, 2);
assert(r == 3);
n[0] = char(0xD1);
n[1] = char(0x93);
r = c.length(m, n, n+2, 3);
r = c.length(m, n, n + 2, 3);
assert(r == 2);
n[0] = char(0x56);
r = c.length(m, n, n+1, 3);
r = c.length(m, n, n + 1, 3);
assert(r == 1);
}
{
typedef std::codecvt_utf8<wchar_t, 0x1000> C;
typedef std::codecvt_utf8<CharT, 0x1000> C;
C c;
char n[4] = {char(0xF1), char(0x80), char(0x80), char(0x83)};
std::mbstate_t m;
int r = c.length(m, n, n+4, 1);
int r = c.length(m, n, n + 4, 1);
assert(r == 0);
n[0] = char(0xE1);
n[1] = char(0x80);
n[2] = char(0x85);
r = c.length(m, n, n+3, 2);
r = c.length(m, n, n + 3, 2);
assert(r == 0);
n[0] = char(0xD1);
n[1] = char(0x93);
r = c.length(m, n, n+2, 3);
r = c.length(m, n, n + 2, 3);
assert(r == 2);
n[0] = char(0x56);
r = c.length(m, n, n+1, 3);
r = c.length(m, n, n + 1, 3);
assert(r == 1);
}
{
typedef std::codecvt_utf8<wchar_t, 0xFFFFFFFF, std::consume_header> C;
typedef std::codecvt_utf8<CharT, 0xFFFFFFFF, std::consume_header> C;
C c;
char n[7] = {char(0xEF), char(0xBB), char(0xBF), char(0xF1), char(0x80), char(0x80), char(0x83)};
std::mbstate_t m;
int r = c.length(m, n, n+7, 1);
int r = c.length(m, n, n + 7, 1);
assert(r == 7);
n[0] = char(0xE1);
n[1] = char(0x80);
n[2] = char(0x85);
r = c.length(m, n, n+3, 2);
r = c.length(m, n, n + 3, 2);
assert(r == 3);
n[0] = char(0xEF);
@ -91,157 +180,18 @@ int main(int, char**)
n[2] = char(0xBF);
n[3] = char(0xD1);
n[4] = char(0x93);
r = c.length(m, n, n+5, 3);
r = c.length(m, n, n + 5, 3);
assert(r == 5);
n[0] = char(0x56);
r = c.length(m, n, n+1, 3);
assert(r == 1);
}
{
typedef std::codecvt_utf8<char32_t> C;
C c;
char n[4] = {char(0xF1), char(0x80), char(0x80), char(0x83)};
std::mbstate_t m;
int r = c.length(m, n, n+4, 1);
assert(r == 4);
n[0] = char(0xE1);
n[1] = char(0x80);
n[2] = char(0x85);
r = c.length(m, n, n+3, 2);
assert(r == 3);
n[0] = char(0xD1);
n[1] = char(0x93);
r = c.length(m, n, n+2, 3);
assert(r == 2);
n[0] = char(0x56);
r = c.length(m, n, n+1, 3);
assert(r == 1);
}
{
typedef std::codecvt_utf8<char32_t, 0x1000> C;
C c;
char n[4] = {char(0xF1), char(0x80), char(0x80), char(0x83)};
std::mbstate_t m;
int r = c.length(m, n, n+4, 1);
assert(r == 0);
n[0] = char(0xE1);
n[1] = char(0x80);
n[2] = char(0x85);
r = c.length(m, n, n+3, 2);
assert(r == 0);
n[0] = char(0xD1);
n[1] = char(0x93);
r = c.length(m, n, n+2, 3);
assert(r == 2);
n[0] = char(0x56);
r = c.length(m, n, n+1, 3);
assert(r == 1);
}
{
typedef std::codecvt_utf8<char32_t, 0xFFFFFFFF, std::consume_header> C;
C c;
char n[7] = {char(0xEF), char(0xBB), char(0xBF), char(0xF1), char(0x80), char(0x80), char(0x83)};
std::mbstate_t m;
int r = c.length(m, n, n+7, 1);
assert(r == 7);
n[0] = char(0xE1);
n[1] = char(0x80);
n[2] = char(0x85);
r = c.length(m, n, n+3, 2);
assert(r == 3);
n[0] = char(0xEF);
n[1] = char(0xBB);
n[2] = char(0xBF);
n[3] = char(0xD1);
n[4] = char(0x93);
r = c.length(m, n, n+5, 3);
assert(r == 5);
n[0] = char(0x56);
r = c.length(m, n, n+1, 3);
assert(r == 1);
}
{
typedef std::codecvt_utf8<char16_t> C;
C c;
char n[4] = {char(0xF1), char(0x80), char(0x80), char(0x83)};
std::mbstate_t m;
int r = c.length(m, n, n+4, 1);
assert(r == 0);
n[0] = char(0xE1);
n[1] = char(0x80);
n[2] = char(0x85);
r = c.length(m, n, n+3, 2);
assert(r == 3);
n[0] = char(0xD1);
n[1] = char(0x93);
r = c.length(m, n, n+2, 3);
assert(r == 2);
n[0] = char(0x56);
r = c.length(m, n, n+1, 3);
assert(r == 1);
}
{
typedef std::codecvt_utf8<char16_t, 0x1000> C;
C c;
char n[4] = {char(0xF1), char(0x80), char(0x80), char(0x83)};
std::mbstate_t m;
int r = c.length(m, n, n+4, 1);
assert(r == 0);
n[0] = char(0xE1);
n[1] = char(0x80);
n[2] = char(0x85);
r = c.length(m, n, n+3, 2);
assert(r == 0);
n[0] = char(0xD1);
n[1] = char(0x93);
r = c.length(m, n, n+2, 3);
assert(r == 2);
n[0] = char(0x56);
r = c.length(m, n, n+1, 3);
assert(r == 1);
}
{
typedef std::codecvt_utf8<char16_t, 0xFFFFFFFF, std::consume_header> C;
C c;
char n[7] = {char(0xEF), char(0xBB), char(0xBF), char(0xF1), char(0x80), char(0x80), char(0x83)};
std::mbstate_t m;
int r = c.length(m, n, n+7, 1);
assert(r == 3);
n[0] = char(0xE1);
n[1] = char(0x80);
n[2] = char(0x85);
r = c.length(m, n, n+3, 2);
assert(r == 3);
n[0] = char(0xEF);
n[1] = char(0xBB);
n[2] = char(0xBF);
n[3] = char(0xD1);
n[4] = char(0x93);
r = c.length(m, n, n+5, 3);
assert(r == 5);
n[0] = char(0x56);
r = c.length(m, n, n+1, 3);
r = c.length(m, n, n + 1, 3);
assert(r == 1);
}
}
int main(int, char**) {
TestHelper<wchar_t>::test();
TestHelper<char32_t>::test();
TestHelper<char16_t>::test();
return 0;
}

View File

@ -23,44 +23,54 @@
#include "test_macros.h"
int main(int, char**)
{
template <class CharT, size_t = sizeof(CharT)>
struct TestHelper;
template <class CharT>
struct TestHelper<CharT, 2> {
static void test();
};
template <class CharT>
struct TestHelper<CharT, 4> {
static void test();
};
template <class CharT>
void TestHelper<CharT, 2>::test() {
{
typedef std::codecvt_utf8<wchar_t> C;
C c;
int r = c.max_length();
assert(r == 4);
}
{
typedef std::codecvt_utf8<wchar_t, 0xFFFFFFFF, std::consume_header> C;
C c;
int r = c.max_length();
assert(r == 7);
}
{
typedef std::codecvt_utf8<char16_t> C;
typedef std::codecvt_utf8<CharT> C;
C c;
int r = c.max_length();
assert(r == 3);
}
{
typedef std::codecvt_utf8<char16_t, 0xFFFFFFFF, std::consume_header> C;
typedef std::codecvt_utf8<CharT, 0xFFFFFFFF, std::consume_header> C;
C c;
int r = c.max_length();
assert(r == 6);
}
}
template <class CharT>
void TestHelper<CharT, 4>::test() {
{
typedef std::codecvt_utf8<char32_t> C;
typedef std::codecvt_utf8<CharT> C;
C c;
int r = c.max_length();
assert(r == 4);
}
{
typedef std::codecvt_utf8<char32_t, 0xFFFFFFFF, std::consume_header> C;
typedef std::codecvt_utf8<CharT, 0xFFFFFFFF, std::consume_header> C;
C c;
int r = c.max_length();
assert(r == 7);
}
}
int main(int, char**) {
TestHelper<wchar_t>::test();
TestHelper<char16_t>::test();
TestHelper<char32_t>::test();
return 0;
}