Make wide multi-character character literals ill-formed

This implements P2362, which has not yet been approved by the
C++ committee, but because wide-multi character literals are
implementation defined, clang might not have to wait for WG21.

This change is also being applied in C mode as the behavior is
implementation-defined in C as well and there's no benefit to
having different rules between the languages.

The other part of P2362, making non-representable character
literals ill-formed, is already implemented by clang
This commit is contained in:
Corentin Jabot 2021-08-20 11:10:53 -04:00 committed by Aaron Ballman
parent c7aacce304
commit bdeda959ab
9 changed files with 26 additions and 41 deletions

View File

@ -100,7 +100,9 @@ Windows Support
C Language Changes in Clang
---------------------------
- ...
- Wide multi-characters literals such as ``L'ab'`` that would previously be interpreted as ``L'b'``
are now ill-formed in all language modes. The motivation for this change is outlined in
`P2362 <wg21.link/P2362>`_.
C++ Language Changes in Clang
-----------------------------

View File

@ -183,12 +183,10 @@ def warn_c2x_compat_digit_separator : Warning<
InGroup<CPre2xCompat>, DefaultIgnore;
def err_digit_separator_not_between_digits : Error<
"digit separator cannot appear at %select{start|end}0 of digit sequence">;
def warn_extraneous_char_constant : Warning<
"extraneous characters in character constant ignored">;
def warn_char_constant_too_large : Warning<
"character constant too long for its type">;
def err_multichar_utf_character_literal : Error<
"Unicode character literals may not contain multiple characters">;
def err_multichar_character_literal : Error<
"%select{wide|Unicode}0 character literals may not contain multiple characters">;
def err_exponent_has_no_digits : Error<"exponent has no digits">;
def err_hex_constant_requires : Error<
"hexadecimal floating %select{constant|literal}0 requires "

View File

@ -1390,14 +1390,14 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front();
if (NumCharsSoFar > 1) {
if (isWide())
PP.Diag(Loc, diag::warn_extraneous_char_constant);
else if (isAscii() && NumCharsSoFar == 4)
if (isAscii() && NumCharsSoFar == 4)
PP.Diag(Loc, diag::warn_four_char_character_literal);
else if (isAscii())
PP.Diag(Loc, diag::warn_multichar_character_literal);
else
PP.Diag(Loc, diag::err_multichar_utf_character_literal);
else {
PP.Diag(Loc, diag::err_multichar_character_literal) << (isWide() ? 0 : 1);
HadError = true;
}
IsMultiChar = true;
} else {
IsMultiChar = false;

View File

@ -1,5 +1,4 @@
// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-C %s
// RUN: %clang_cc1 -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-C %s
// RUN: %clang_cc1 -x c++ -std=c++11 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-CPP0X %s
#include <stddef.h>
@ -33,11 +32,6 @@ int main() {
// CHECK-CPP0X: store i32 97
wchar_t wa = L'a';
// Should pick second character.
// CHECK-C: store i32 98
// CHECK-CPP0X: store i32 98
wchar_t wb = L'ab';
#if __cplusplus >= 201103L
// CHECK-CPP0X: store i16 97
char16_t ua = u'a';
@ -83,8 +77,4 @@ int main() {
char32_t Ud = U'\U0010F00B';
#endif
// Should pick second character.
// CHECK-C: store i32 1110027
// CHECK-CPP0X: store i32 1110027
wchar_t we = L'\u1234\U0010F00B';
}

View File

@ -1,11 +1,14 @@
// RUN: %clang_cc1 -x c++ -triple %itanium_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=ITANIUM
// RUN: %clang_cc1 -x c++ -triple %ms_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=MSABI
// Runs in c++ mode so that wchar_t is available.
// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=ITANIUM
// RUN: %clang_cc1 -triple %ms_abi_triple -emit-llvm -fwchar-type=short -fno-signed-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=MSABI
// Run in C mode as wide multichar literals are not valid in C++
// XFAIL: hexagon
// Hexagon aligns arrays of size 8+ bytes to a 64-bit boundary, which fails
// the first check line with "align 1".
typedef __WCHAR_TYPE__ wchar_t;
int main() {
// This should convert to utf8.
// CHECK: private unnamed_addr constant [10 x i8] c"\E1\84\A0\C8\A0\F4\82\80\B0\00", align 1
@ -20,8 +23,6 @@ int main() {
// MSABI: linkonce_odr dso_local unnamed_addr constant [5 x i16] [i16 4384, i16 544, i16 -9272, i16 -9168, i16 0]
const wchar_t *bar = L"\u1120\u0220\U00102030";
// Should pick second character.
// CHECK: store i8 98
char c = 'ab';
@ -29,10 +30,6 @@ int main() {
// CHECK: store i16 97
wchar_t wa = L'a';
// Should pick second character.
// CHECK: store i16 98
wchar_t wb = L'ab';
// -4085 == 0xf00b
// CHECK: store i16 -4085
wchar_t wc = L'\uF00B';

View File

@ -21,7 +21,8 @@ auto f = '\xE2\x8C\x98'; // expected-warning {{multi-character character constan
char16_t g = u'ab'; // expected-error {{Unicode character literals may not contain multiple characters}}
char16_t h = u'\U0010FFFD'; // expected-error {{character too large for enclosing character literal type}}
wchar_t i = L'ab'; // expected-warning {{extraneous characters in character constant ignored}}
wchar_t i = L'ab'; // expected-error {{wide character literals may not contain multiple characters}}
wchar_t j = L'\U0010FFFD';
char32_t k = U'\U0010FFFD';

View File

@ -3,10 +3,8 @@
void f() {
(void)L"\U00010000"; // unicode escape produces UTF-16 sequence, so no warning
(void)L'\U00010000'; // expected-error {{character too large for enclosing character literal type}}
(void)L'ab'; // expected-error {{wide character literals may not contain multiple characters}}
(void)L'ab'; // expected-warning {{extraneous characters in character constant ignored}}
(void)L'a\u1000'; // expected-warning {{extraneous characters in character constant ignored}}
(void)L'a\u1000'; // expected-error {{wide character literals may not contain multiple characters}}
}

View File

@ -18,7 +18,7 @@ This test serves two purposes:
The list of warnings below should NEVER grow. It should gradually shrink to 0.
CHECK: Warnings without flags (68):
CHECK: Warnings without flags (67):
CHECK-NEXT: ext_expected_semi_decl_list
CHECK-NEXT: ext_explicit_specialization_storage_class
@ -50,7 +50,6 @@ CHECK-NEXT: warn_drv_pch_not_first_include
CHECK-NEXT: warn_dup_category_def
CHECK-NEXT: warn_enum_value_overflow
CHECK-NEXT: warn_expected_qualified_after_typename
CHECK-NEXT: warn_extraneous_char_constant
CHECK-NEXT: warn_fe_backend_unsupported
CHECK-NEXT: warn_fe_cc_log_diagnostics_failure
CHECK-NEXT: warn_fe_cc_print_header_failure

View File

@ -10,21 +10,21 @@ void foo(void) // expected-warning {{no previous prototype for function}}
// expected-note@-1{{declare 'static' if the function is not intended to be used outside of this translation unit}}
{
// A diagnostic without DefaultIgnore, and not part of a group.
(void) L'ab'; // expected-warning {{extraneous characters in character constant ignored}}
(void) 'ab'; // expected-warning {{multi-character character constant}}
#pragma clang diagnostic warning "-Weverything" // Should not change anyhting.
#define UNUSED_MACRO2 1 // expected-warning{{macro is not used}}
(void) L'cd'; // expected-warning {{extraneous characters in character constant ignored}}
(void) 'cd'; // expected-warning {{multi-character character constant}}
#pragma clang diagnostic ignored "-Weverything" // Ignore warnings now.
#define UNUSED_MACRO2 1 // no warning
(void) L'ef'; // no warning here
(void) 'ef'; // no warning here
#pragma clang diagnostic warning "-Weverything" // Revert back to warnings.
#define UNUSED_MACRO3 1 // expected-warning{{macro is not used}}
(void) L'gh'; // expected-warning {{extraneous characters in character constant ignored}}
(void) 'gh'; // expected-warning {{multi-character character constant}}
#pragma clang diagnostic error "-Weverything" // Give errors now.
#define UNUSED_MACRO4 1 // expected-error{{macro is not used}}
(void) L'ij'; // expected-error {{extraneous characters in character constant ignored}}
(void) 'ij'; // expected-error {{multi-character character constant}}
}