From e80748ff8840a10bd7c7336eb5e98664480ba1ba Mon Sep 17 00:00:00 2001 From: Kaining Zhong Date: Tue, 7 Jun 2022 19:39:46 +0200 Subject: [PATCH] [clang-diff] Fix assertion error when dealing with wide strings Directly using StringLiteral::getString for wide string is not currently supported; therefore in ASTDiff, getStmtValue will fail when asserting that the StringLiteral has a width of 1. This patch also covers cases for UTF16 and UTF32 encoding, along with corresponding test cases. Fixes https://github.com/llvm/llvm-project/issues/55771. Reviewed By: johannes Differential Revision: https://reviews.llvm.org/D126651 --- clang/lib/Tooling/ASTDiff/ASTDiff.cpp | 24 +++++++++++++++++++++++- clang/test/Tooling/clang-diff-ast.cpp | 6 ++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/clang/lib/Tooling/ASTDiff/ASTDiff.cpp b/clang/lib/Tooling/ASTDiff/ASTDiff.cpp index 0821863adcc6..786def58076a 100644 --- a/clang/lib/Tooling/ASTDiff/ASTDiff.cpp +++ b/clang/lib/Tooling/ASTDiff/ASTDiff.cpp @@ -16,6 +16,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/PriorityQueue.h" +#include "llvm/Support/ConvertUTF.h" #include #include @@ -463,8 +464,29 @@ std::string SyntaxTree::Impl::getStmtValue(const Stmt *S) const { } if (auto *D = dyn_cast(S)) return getRelativeName(D->getDecl(), getEnclosingDeclContext(AST, S)); - if (auto *String = dyn_cast(S)) + if (auto *String = dyn_cast(S)) { + if (String->isWide() || String->isUTF16() || String->isUTF32()) { + std::string UTF8Str; + unsigned int NumChars = String->getLength(); + const char *Bytes = String->getBytes().data(); + if (String->isWide()) { + const auto *Chars = reinterpret_cast(Bytes); + if (!convertWideToUTF8({Chars, NumChars}, UTF8Str)) + return ""; + } else if (String->isUTF16()) { + const auto *Chars = reinterpret_cast(Bytes); + if (!convertUTF16ToUTF8String({Chars, NumChars}, UTF8Str)) + return ""; + } else { + assert(String->isUTF32() && "Unsupported string encoding."); + const auto *Chars = reinterpret_cast(Bytes); + if (!convertUTF32ToUTF8String({Chars, NumChars}, UTF8Str)) + return ""; + } + return UTF8Str; + } return std::string(String->getString()); + } if (auto *B = dyn_cast(S)) return B->getValue() ? "true" : "false"; return ""; diff --git a/clang/test/Tooling/clang-diff-ast.cpp b/clang/test/Tooling/clang-diff-ast.cpp index a8efda50a405..e67128a098ef 100644 --- a/clang/test/Tooling/clang-diff-ast.cpp +++ b/clang/test/Tooling/clang-diff-ast.cpp @@ -47,6 +47,12 @@ class X : Base { if (i == 0) // CHECK: StringLiteral: foo( return "foo"; + // CHECK: StringLiteral: wide( + (void)L"wide"; + // CHECK: StringLiteral: utf-16( + (void)u"utf-16"; + // CHECK: StringLiteral: utf-32( + (void)U"utf-32"; // CHECK-NOT: ImplicitCastExpr return 0; }