[clang-format] Skip UTF8 Byte Order Mark while sorting includes

If file contain BOM then first instruction (include or clang-format off) is ignored

Reviewed By: MyDeveloperDay

Differential Revision: https://reviews.llvm.org/D94201
This commit is contained in:
Rafał Jelonek 2021-01-11 09:28:41 +01:00 committed by Marek Kurdej
parent cddd3faf5d
commit ee27c767bd
2 changed files with 39 additions and 1 deletions

View File

@ -2253,7 +2253,9 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
StringRef FileName,
tooling::Replacements &Replaces,
unsigned *Cursor) {
unsigned Prev = 0;
unsigned Prev = llvm::StringSwitch<size_t>(Code)
.StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM
.Default(0);
unsigned SearchFrom = 0;
llvm::Regex IncludeRegex(CppIncludeRegexPattern);
SmallVector<StringRef, 4> Matches;

View File

@ -879,6 +879,42 @@ TEST_F(SortIncludesTest, DoNotRegroupGroupsInGoogleObjCStyle) {
"#include \"a.h\""));
}
TEST_F(SortIncludesTest, skipUTF8ByteOrderMarkMerge) {
Style.IncludeBlocks = Style.IBS_Merge;
std::string Code = "\xEF\xBB\xBF#include \"d.h\"\r\n"
"#include \"b.h\"\r\n"
"\r\n"
"#include \"c.h\"\r\n"
"#include \"a.h\"\r\n"
"#include \"e.h\"\r\n";
std::string Expected = "\xEF\xBB\xBF#include \"e.h\"\r\n"
"#include \"a.h\"\r\n"
"#include \"b.h\"\r\n"
"#include \"c.h\"\r\n"
"#include \"d.h\"\r\n";
EXPECT_EQ(Expected, sort(Code, "e.cpp", 1));
}
TEST_F(SortIncludesTest, skipUTF8ByteOrderMarkPreserve) {
Style.IncludeBlocks = Style.IBS_Preserve;
std::string Code = "\xEF\xBB\xBF#include \"d.h\"\r\n"
"#include \"b.h\"\r\n"
"\r\n"
"#include \"c.h\"\r\n"
"#include \"a.h\"\r\n"
"#include \"e.h\"\r\n";
std::string Expected = "\xEF\xBB\xBF#include \"b.h\"\r\n"
"#include \"d.h\"\r\n"
"\r\n"
"#include \"a.h\"\r\n"
"#include \"c.h\"\r\n"
"#include \"e.h\"\r\n";
EXPECT_EQ(Expected, sort(Code, "e.cpp", 2));
}
} // end namespace
} // end namespace format
} // end namespace clang