[YAML] Fix UTF-8 handling

Previous YAML quoting patches broke UTF-8 printing in YAML: see https://reviews.llvm.org/D41290#961801.

Differential Revision: https://reviews.llvm.org/D41490

llvm-svn: 321283
This commit is contained in:
Francis Visoiu Mistrih 2017-12-21 17:14:09 +00:00
parent 2e0f7bd0fe
commit b2b961a3db
2 changed files with 34 additions and 1 deletions

View File

@ -657,7 +657,12 @@ void Output::scalarString(StringRef &S, QuotingType MustQuote) {
}
i = j + 1;
} else if (MustQuote == QuotingType::Double &&
!sys::unicode::isPrintable(S[j])) {
!sys::unicode::isPrintable(S[j]) && (S[j] & 0x80) == 0) {
// If we're double quoting non-printable characters, we prefer printing
// them as "\x" + their hex representation. Note that special casing is
// needed for UTF-8, where a byte may be part of a UTF-8 sequence and
// appear as non-printable, in which case we want to print the correct
// unicode character and not its hex representation.
output(StringRef(&Base[i], j - i)); // "flush"
output(StringLiteral("\\x"));

View File

@ -2541,3 +2541,31 @@ TEST(YAMLIO, TestEscapedSingleQuoteInsideSingleQuote) {
ostr.flush();
EXPECT_EQ("'abc''fdf'", out);
}
TEST(YAMLIO, TestEscapedUTF8SingleQuoteInsideDoubleQuote) {
std::string Id = "parameter 'параметр' is unused";
std::string out;
llvm::raw_string_ostream ostr(out);
Output xout(ostr, nullptr, 0);
llvm::yaml::EmptyContext Ctx;
yamlize(xout, Id, true, Ctx);
ostr.flush();
EXPECT_EQ("\"parameter 'параметр' is unused\"", out);
}
TEST(YAMLIO, TestEscapedUTF8) {
std::string Id = "/*параметр*/";
std::string out;
llvm::raw_string_ostream ostr(out);
Output xout(ostr, nullptr, 0);
llvm::yaml::EmptyContext Ctx;
yamlize(xout, Id, true, Ctx);
ostr.flush();
EXPECT_EQ("\"/*параметр*/\"", out);
}