From 817550919e78ba9bb8336685fe1f40e4f650b2e4 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 6 May 2022 01:44:41 +0200 Subject: [PATCH] [Lex] Don't assert when decoding invalid UCNs. Currently if a lexically-valid UCN encodes an invalid codepoint, then we diagnose that, and then hit an assertion while trying to decode it. Since there isn't anything preventing us reaching this state, remove the assertion. expandUCNs("X\UAAAAAAAAY") will produce "XY". Differential Revision: https://reviews.llvm.org/D125059 --- clang/lib/Lex/LiteralSupport.cpp | 6 ++---- clang/test/Lexer/unicode.c | 3 +++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp index 6e6fd361ebf9..9a30a41c851d 100644 --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -320,10 +320,8 @@ static void appendCodePoint(unsigned Codepoint, llvm::SmallVectorImpl &Str) { char ResultBuf[4]; char *ResultPtr = ResultBuf; - bool Res = llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr); - (void)Res; - assert(Res && "Unexpected conversion failure"); - Str.append(ResultBuf, ResultPtr); + if (llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr)) + Str.append(ResultBuf, ResultPtr); } void clang::expandUCNs(SmallVectorImpl &Buf, StringRef Input) { diff --git a/clang/test/Lexer/unicode.c b/clang/test/Lexer/unicode.c index f67b55415f96..b0cc28cfb915 100644 --- a/clang/test/Lexer/unicode.c +++ b/clang/test/Lexer/unicode.c @@ -28,6 +28,9 @@ CHECK : The preprocessor should not complain about Unicode characters like ©. int _; +extern int X\UAAAAAAAA; // expected-error {{not allowed in an identifier}} +int Y = '\UAAAAAAAA'; // expected-error {{invalid universal character}} + #ifdef __cplusplus extern int ༀ;