[clang-format] [PR49298] Sort includes pass will sort inside raw strings

https://github.com/llvm/llvm-project/issues/48642

clang-format does not respect raw string literals when sorting includes

```
const char *RawStr = R"(
)";
```

Running clang-format over with SortIncludes enabled transforms this code to:

```
const char *RawStr = R"(
)";
```
The following code tries to minimize this impact during IncludeSorting, by treating R"( and )" as equivalent of // clang-format off/on

Reviewed By: HazardyKnusperkeks, curdeius

Differential Revision: https://reviews.llvm.org/D115168

Fixes #48642
This commit is contained in:
mydeveloperday 2021-12-12 17:00:43 +00:00
parent 77e019c233
commit 05bea533d1
2 changed files with 166 additions and 0 deletions

View File

@ -2586,12 +2586,31 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
bool MainIncludeFound = false;
bool FormattingOff = false;
llvm::Regex RawStringRegex(
"R\"(([\\[A-Za-z0-9_{}#<>%:;.?*+/^&\\$|~!=,'\\-]|])*)\\(");
SmallVector<StringRef, 2> RawStringMatches;
std::string RawStringTermination = ")\"";
for (;;) {
auto Pos = Code.find('\n', SearchFrom);
StringRef Line =
Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev);
StringRef Trimmed = Line.trim();
// #includes inside raw string literals need to be ignored.
// or we will sort the contents of the string.
// Skip past until we think we are at the rawstring literal close.
if (RawStringRegex.match(Trimmed, &RawStringMatches)) {
std::string CharSequence = RawStringMatches[1].str();
RawStringTermination = ")" + CharSequence + "\"";
FormattingOff = true;
}
if (Trimmed.contains(RawStringTermination)) {
FormattingOff = false;
}
if (Trimmed == "// clang-format off" || Trimmed == "/* clang-format off */")
FormattingOff = true;
else if (Trimmed == "// clang-format on" ||

View File

@ -1045,6 +1045,153 @@ TEST_F(SortIncludesTest, DisableFormatDisablesIncludeSorting) {
EXPECT_EQ(Unsorted, sort(Unsorted, "input.cpp", 0));
}
TEST_F(SortIncludesTest, DisableRawStringLiteralSorting) {
EXPECT_EQ("const char *t = R\"(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")\";",
sort("const char *t = R\"(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")\";",
"test.cxx", 0));
EXPECT_EQ("const char *t = R\"x(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")x\";",
sort("const char *t = R\"x(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")x\";",
"test.cxx", 0));
EXPECT_EQ("const char *t = R\"xyz(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")xyz\";",
sort("const char *t = R\"xyz(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")xyz\";",
"test.cxx", 0));
EXPECT_EQ("#include <a.h>\n"
"#include <b.h>\n"
"const char *t = R\"(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")\";\n"
"#include <c.h>\n"
"#include <d.h>\n"
"const char *t = R\"x(\n"
"#include <f.h>\n"
"#include <e.h>\n"
")x\";\n"
"#include <g.h>\n"
"#include <h.h>\n"
"const char *t = R\"xyz(\n"
"#include <j.h>\n"
"#include <i.h>\n"
")xyz\";\n"
"#include <k.h>\n"
"#include <l.h>",
sort("#include <b.h>\n"
"#include <a.h>\n"
"const char *t = R\"(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")\";\n"
"#include <d.h>\n"
"#include <c.h>\n"
"const char *t = R\"x(\n"
"#include <f.h>\n"
"#include <e.h>\n"
")x\";\n"
"#include <h.h>\n"
"#include <g.h>\n"
"const char *t = R\"xyz(\n"
"#include <j.h>\n"
"#include <i.h>\n"
")xyz\";\n"
"#include <l.h>\n"
"#include <k.h>",
"test.cc", 4));
EXPECT_EQ("const char *t = R\"AMZ029amz(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")AMZ029amz\";",
sort("const char *t = R\"AMZ029amz(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")AMZ029amz\";",
"test.cxx", 0));
EXPECT_EQ("const char *t = R\"-AMZ029amz(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")-AMZ029amz\";",
sort("const char *t = R\"-AMZ029amz(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")-AMZ029amz\";",
"test.cxx", 0));
EXPECT_EQ("const char *t = R\"AMZ029amz-(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")AMZ029amz-\";",
sort("const char *t = R\"AMZ029amz-(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")AMZ029amz-\";",
"test.cxx", 0));
EXPECT_EQ("const char *t = R\"AM|029amz-(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")AM|029amz-\";",
sort("const char *t = R\"AM|029amz-(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")AM|029amz-\";",
"test.cxx", 0));
EXPECT_EQ("const char *t = R\"AM[029amz-(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")AM[029amz-\";",
sort("const char *t = R\"AM[029amz-(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")AM[029amz-\";",
"test.cxx", 0));
EXPECT_EQ("const char *t = R\"AM]029amz-(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")AM]029amz-\";",
sort("const char *t = R\"AM]029amz-(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")AM]029amz-\";",
"test.cxx", 0));
#define X "AMZ029amz{}+!%*=_:;',.<>|/?#~-$"
EXPECT_EQ("const char *t = R\"" X "(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")" X "\";",
sort("const char *t = R\"" X "(\n"
"#include <b.h>\n"
"#include <a.h>\n"
")" X "\";",
"test.cxx", 0));
#undef X
}
} // end namespace
} // end namespace format
} // end namespace clang