forked from OSchip/llvm-project
[clang][lexer] Allow u8 character literal prefixes in C2x
Implement N2418 for C2x. Differential Revision: https://reviews.llvm.org/D119221
This commit is contained in:
parent
653de14f17
commit
33ec653055
|
@ -226,6 +226,7 @@ C2x Feature Support
|
|||
- Implemented `WG14 N2775 Literal suffixes for bit-precise integers <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2775.pdf>`_.
|
||||
- Implemented the `*_WIDTH` macros to complete support for
|
||||
`WG14 N2412 Two's complement sign representation for C2x <https://www9.open-std.org/jtc1/sc22/wg14/www/docs/n2412.pdf>`_.
|
||||
- Implemented `WG14 N2418 Adding the u8 character prefix <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2418.pdf>`_.
|
||||
|
||||
C++ Language Changes in Clang
|
||||
-----------------------------
|
||||
|
|
|
@ -3459,7 +3459,10 @@ LexNextToken:
|
|||
MIOpt.ReadToken();
|
||||
return LexNumericConstant(Result, CurPtr);
|
||||
|
||||
case 'u': // Identifier (uber) or C11/C++11 UTF-8 or UTF-16 string literal
|
||||
// Identifier (e.g., uber), or
|
||||
// UTF-8 (C2x/C++17) or UTF-16 (C11/C++11) character literal, or
|
||||
// UTF-8 or UTF-16 string literal (C11/C++11).
|
||||
case 'u':
|
||||
// Notify MIOpt that we read a non-whitespace/non-comment token.
|
||||
MIOpt.ReadToken();
|
||||
|
||||
|
@ -3493,7 +3496,7 @@ LexNextToken:
|
|||
ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
|
||||
SizeTmp2, Result),
|
||||
tok::utf8_string_literal);
|
||||
if (Char2 == '\'' && LangOpts.CPlusPlus17)
|
||||
if (Char2 == '\'' && (LangOpts.CPlusPlus17 || LangOpts.C2x))
|
||||
return LexCharConstant(
|
||||
Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
|
||||
SizeTmp2, Result),
|
||||
|
@ -3517,7 +3520,7 @@ LexNextToken:
|
|||
// treat u like the start of an identifier.
|
||||
return LexIdentifierContinue(Result, CurPtr);
|
||||
|
||||
case 'U': // Identifier (Uber) or C11/C++11 UTF-32 string literal
|
||||
case 'U': // Identifier (e.g. Uber) or C11/C++11 UTF-32 string literal
|
||||
// Notify MIOpt that we read a non-whitespace/non-comment token.
|
||||
MIOpt.ReadToken();
|
||||
|
||||
|
|
|
@ -3609,6 +3609,8 @@ ExprResult Sema::ActOnCharacterConstant(const Token &Tok, Scope *UDLScope) {
|
|||
QualType Ty;
|
||||
if (Literal.isWide())
|
||||
Ty = Context.WideCharTy; // L'x' -> wchar_t in C and C++.
|
||||
else if (Literal.isUTF8() && getLangOpts().C2x)
|
||||
Ty = Context.UnsignedCharTy; // u8'x' -> unsigned char in C2x
|
||||
else if (Literal.isUTF8() && getLangOpts().Char8)
|
||||
Ty = Context.Char8Ty; // u8'x' -> char8_t when it exists.
|
||||
else if (Literal.isUTF16())
|
||||
|
@ -3618,7 +3620,8 @@ ExprResult Sema::ActOnCharacterConstant(const Token &Tok, Scope *UDLScope) {
|
|||
else if (!getLangOpts().CPlusPlus || Literal.isMultiChar())
|
||||
Ty = Context.IntTy; // 'x' -> int in C, 'wxyz' -> int in C++.
|
||||
else
|
||||
Ty = Context.CharTy; // 'x' -> char in C++
|
||||
Ty = Context.CharTy; // 'x' -> char in C++;
|
||||
// u8'x' -> char in C11-C17 and in C++ without char8_t.
|
||||
|
||||
CharacterLiteral::CharacterKind Kind = CharacterLiteral::Ascii;
|
||||
if (Literal.isWide())
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -fsyntax-only -verify %s
|
||||
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c11 -x c -fsyntax-only -verify %s
|
||||
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c2x -x c -fsyntax-only -verify %s
|
||||
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++1z -fsyntax-only -verify %s
|
||||
|
||||
int array0[u'ñ' == u'\xf1'? 1 : -1];
|
||||
|
@ -12,4 +13,16 @@ char c = u8'\u0080'; // expected-error {{character too large for enclosing chara
|
|||
char d = u8'\u1234'; // expected-error {{character too large for enclosing character literal type}}
|
||||
char e = u8'ሴ'; // expected-error {{character too large for enclosing character literal type}}
|
||||
char f = u8'ab'; // expected-error {{Unicode character literals may not contain multiple characters}}
|
||||
#elif __STDC_VERSION__ > 202000L
|
||||
char a = u8'ñ'; // expected-error {{character too large for enclosing character literal type}}
|
||||
char b = u8'\x80'; // ok
|
||||
char c = u8'\u0080'; // expected-error {{universal character name refers to a control character}}
|
||||
char d = u8'\u1234'; // expected-error {{character too large for enclosing character literal type}}
|
||||
char e = u8'ሴ'; // expected-error {{character too large for enclosing character literal type}}
|
||||
char f = u8'ab'; // expected-error {{Unicode character literals may not contain multiple characters}}
|
||||
_Static_assert(
|
||||
_Generic(u8'a',
|
||||
default : 0,
|
||||
unsigned char : 1),
|
||||
"Surprise!");
|
||||
#endif
|
||||
|
|
|
@ -720,7 +720,7 @@ conformance.</p>
|
|||
<tr>
|
||||
<td>Adding the u8 character prefix</td>
|
||||
<td><a href="http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2418.pdf">N2418</a></td>
|
||||
<td class="none" align="center">No</td>
|
||||
<td class="unreleased" align="center">Clang 15</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Remove support for function definitions with identifier lists</td>
|
||||
|
|
Loading…
Reference in New Issue