forked from OSchip/llvm-project
[clang-format] SortIncludes should support "@import" lines in Objective-C
Fixes [[ https://github.com/llvm/llvm-project/issues/38995 | #38995 ]] This is an attempt to modify the regular expression to identify `@import` and `import` alongside the regular `#include`. The challenging part was not to support `@` in addition to `#` but how to handle everything that comes after the `include|import` keywords. Previously everything that wasn't `"` or `<` was consumed. But as you can see in this example from the issue #38995, there is no `"` or `<` following the keyword: ``` @import Foundation; ``` I experimented with a lot of fancy and useful expressions in [this online regex tool](https://regex101.com) only to find out that some things are simply not supported by the regex implementation in LLVM. * For example the beginning `[\t\ ]*` should be replacable by the horizontal whitespace character `\h*` but this will break the `SortIncludesTest.LeadingWhitespace` test. That's why I've chosen to come back to the basic building blocks. The essential change in this patch is the change from this regular expression: ``` ^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]) ~ ~~~~~~~~~~~~~~ ^ ^ | | only support # prefix not @ | only support "" and <> as delimiters no support for C++ modules and ; ending. Also this allows for "> or <" or "" or <> which all seems either off or wrong. ``` to this: ``` ^[\t\ ]*[@#][\t\ ]*(import|include)([^"]*("[^"]+")|[^<]*(<[^>]+>)|[\t\ ]*([^;]+;)) ~~~~ ~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ ^ ^ ^ ^ ^ | | | | | Now support @ and #. Clearly support "" and <> as well as an include name without enclosing characters. Allows for no mixture of "> or <" or empty include names. ``` Here is how I've tested this patch: ``` ninja clang-Format ninja FormatTests ./tools/clang/unittests/Format/FormatTests --gtest_filter=SortIncludesTest* ``` And if that worked I doubled checked that nothing else broke by running all format checks: ``` ./tools/clang/unittests/Format/FormatTests ``` One side effect of this change is it should partially support [C++20 Module](https://en.cppreference.com/w/cpp/language/modules) `import` lines without the optional `export` in front. Adding this can be a change on its own that shouldn't be too hard. I say partially because the `@` or `#` are currently *NOT* optional in the regular expression. I see an opportunity to optimized the matching to exclude `@include` for example. But eventually these should be caught by the compiler, so... With my change, the matching group is not at a fixed position any longer. I decided to choose the last match (group) that is not empty. Reviewed By: HazardyKnusperkeks Differential Revision: https://reviews.llvm.org/D121370
This commit is contained in:
parent
57d17795b9
commit
d46fa023ca
|
@ -129,6 +129,23 @@ private:
|
|||
llvm::Regex IncludeRegex;
|
||||
};
|
||||
|
||||
/// \returns a regex that can match various styles of C++ includes.
|
||||
/// For example:
|
||||
/// \code
|
||||
/// #include <foo.h>
|
||||
/// @import bar;
|
||||
/// #include "bar.h"
|
||||
/// \endcode
|
||||
llvm::Regex getCppIncludeRegex();
|
||||
|
||||
/// \returns the last match in the list of matches that is not empty.
|
||||
llvm::StringRef getIncludeNameFromMatches(
|
||||
const llvm::SmallVectorImpl<llvm::StringRef> &Matches);
|
||||
|
||||
/// \returns the given include name and removes the following symbols from the
|
||||
/// beginning and ending of the include name: " > < ;
|
||||
llvm::StringRef trimInclude(llvm::StringRef IncludeName);
|
||||
|
||||
} // namespace tooling
|
||||
} // namespace clang
|
||||
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
#include "llvm/Support/VirtualFileSystem.h"
|
||||
#include "llvm/Support/YAMLTraits.h"
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
|
@ -2721,13 +2722,6 @@ static void sortCppIncludes(const FormatStyle &Style,
|
|||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
const char CppIncludeRegexPattern[] =
|
||||
R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
|
||||
ArrayRef<tooling::Range> Ranges,
|
||||
StringRef FileName,
|
||||
|
@ -2737,7 +2731,7 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
|
|||
.StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM
|
||||
.Default(0);
|
||||
unsigned SearchFrom = 0;
|
||||
llvm::Regex IncludeRegex(CppIncludeRegexPattern);
|
||||
llvm::Regex IncludeRegex(tooling::getCppIncludeRegex());
|
||||
SmallVector<StringRef, 4> Matches;
|
||||
SmallVector<IncludeDirective, 16> IncludesInBlock;
|
||||
|
||||
|
@ -2793,7 +2787,14 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
|
|||
bool MergeWithNextLine = Trimmed.endswith("\\");
|
||||
if (!FormattingOff && !MergeWithNextLine) {
|
||||
if (IncludeRegex.match(Line, &Matches)) {
|
||||
StringRef IncludeName = Matches[2];
|
||||
StringRef IncludeName = tooling::getIncludeNameFromMatches(Matches);
|
||||
// This addresses https://github.com/llvm/llvm-project/issues/38995
|
||||
bool WithSemicolon = false;
|
||||
if (!IncludeName.startswith("\"") && !IncludeName.startswith("<") &&
|
||||
IncludeName.endswith(";")) {
|
||||
WithSemicolon = true;
|
||||
}
|
||||
|
||||
if (Line.contains("/*") && !Line.contains("*/")) {
|
||||
// #include with a start of a block comment, but without the end.
|
||||
// Need to keep all the lines until the end of the comment together.
|
||||
|
@ -2806,8 +2807,10 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
|
|||
int Category = Categories.getIncludePriority(
|
||||
IncludeName,
|
||||
/*CheckMainHeader=*/!MainIncludeFound && FirstIncludeBlock);
|
||||
int Priority = Categories.getSortIncludePriority(
|
||||
IncludeName, !MainIncludeFound && FirstIncludeBlock);
|
||||
int Priority = WithSemicolon ? std::numeric_limits<int>::max()
|
||||
: Categories.getSortIncludePriority(
|
||||
IncludeName, !MainIncludeFound &&
|
||||
FirstIncludeBlock);
|
||||
if (Category == 0)
|
||||
MainIncludeFound = true;
|
||||
IncludesInBlock.push_back(
|
||||
|
@ -3067,8 +3070,7 @@ namespace {
|
|||
|
||||
inline bool isHeaderInsertion(const tooling::Replacement &Replace) {
|
||||
return Replace.getOffset() == UINT_MAX && Replace.getLength() == 0 &&
|
||||
llvm::Regex(CppIncludeRegexPattern)
|
||||
.match(Replace.getReplacementText());
|
||||
tooling::getCppIncludeRegex().match(Replace.getReplacementText());
|
||||
}
|
||||
|
||||
inline bool isHeaderDeletion(const tooling::Replacement &Replace) {
|
||||
|
@ -3076,7 +3078,7 @@ inline bool isHeaderDeletion(const tooling::Replacement &Replace) {
|
|||
}
|
||||
|
||||
// FIXME: insert empty lines between newly created blocks.
|
||||
tooling::Replacements
|
||||
static tooling::Replacements
|
||||
fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
|
||||
const FormatStyle &Style) {
|
||||
if (!Style.isCpp())
|
||||
|
@ -3108,7 +3110,7 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
|
|||
|
||||
for (const auto &Header : HeadersToDelete) {
|
||||
tooling::Replacements Replaces =
|
||||
Includes.remove(Header.trim("\"<>"), Header.startswith("<"));
|
||||
Includes.remove(tooling::trimInclude(Header), Header.startswith("<"));
|
||||
for (const auto &R : Replaces) {
|
||||
auto Err = Result.add(R);
|
||||
if (Err) {
|
||||
|
@ -3120,7 +3122,7 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
|
|||
}
|
||||
}
|
||||
|
||||
llvm::Regex IncludeRegex = llvm::Regex(CppIncludeRegexPattern);
|
||||
llvm::Regex IncludeRegex = tooling::getCppIncludeRegex();
|
||||
llvm::SmallVector<StringRef, 4> Matches;
|
||||
for (const auto &R : HeaderInsertions) {
|
||||
auto IncludeDirective = R.getReplacementText();
|
||||
|
@ -3128,9 +3130,9 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
|
|||
assert(Matched && "Header insertion replacement must have replacement text "
|
||||
"'#include ...'");
|
||||
(void)Matched;
|
||||
auto IncludeName = Matches[2];
|
||||
auto Replace =
|
||||
Includes.insert(IncludeName.trim("\"<>"), IncludeName.startswith("<"));
|
||||
StringRef IncludeName = tooling::getIncludeNameFromMatches(Matches);
|
||||
auto Replace = Includes.insert(tooling::trimInclude(IncludeName),
|
||||
IncludeName.startswith("<"));
|
||||
if (Replace) {
|
||||
auto Err = Result.add(*Replace);
|
||||
if (Err) {
|
||||
|
|
|
@ -169,13 +169,6 @@ unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code,
|
|||
});
|
||||
}
|
||||
|
||||
inline StringRef trimInclude(StringRef IncludeName) {
|
||||
return IncludeName.trim("\"<>");
|
||||
}
|
||||
|
||||
const char IncludeRegexPattern[] =
|
||||
R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
|
||||
|
||||
// The filename of Path excluding extension.
|
||||
// Used to match implementation with headers, this differs from sys::path::stem:
|
||||
// - in names with multiple dots (foo.cu.cc) it terminates at the *first*
|
||||
|
@ -274,8 +267,7 @@ HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
|
|||
MaxInsertOffset(MinInsertOffset +
|
||||
getMaxHeaderInsertionOffset(
|
||||
FileName, Code.drop_front(MinInsertOffset), Style)),
|
||||
Categories(Style, FileName),
|
||||
IncludeRegex(llvm::Regex(IncludeRegexPattern)) {
|
||||
Categories(Style, FileName), IncludeRegex(getCppIncludeRegex()) {
|
||||
// Add 0 for main header and INT_MAX for headers that are not in any
|
||||
// category.
|
||||
Priorities = {0, INT_MAX};
|
||||
|
@ -290,10 +282,11 @@ HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
|
|||
for (auto Line : Lines) {
|
||||
NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
|
||||
if (IncludeRegex.match(Line, &Matches)) {
|
||||
StringRef IncludeName = tooling::getIncludeNameFromMatches(Matches);
|
||||
// If this is the last line without trailing newline, we need to make
|
||||
// sure we don't delete across the file boundary.
|
||||
addExistingInclude(
|
||||
Include(Matches[2],
|
||||
Include(IncludeName,
|
||||
tooling::Range(
|
||||
Offset, std::min(Line.size() + 1, Code.size() - Offset))),
|
||||
NextLineOffset);
|
||||
|
@ -403,5 +396,25 @@ tooling::Replacements HeaderIncludes::remove(llvm::StringRef IncludeName,
|
|||
return Result;
|
||||
}
|
||||
|
||||
llvm::Regex getCppIncludeRegex() {
|
||||
static const char CppIncludeRegexPattern[] =
|
||||
R"(^[\t\ ]*[@#][\t\ ]*(import|include)([^"]*("[^"]+")|[^<]*(<[^>]+>)|[\t\ ]*([^;]+;)))";
|
||||
return llvm::Regex(CppIncludeRegexPattern);
|
||||
}
|
||||
|
||||
llvm::StringRef getIncludeNameFromMatches(
|
||||
const llvm::SmallVectorImpl<llvm::StringRef> &Matches) {
|
||||
for (auto Match : llvm::reverse(Matches)) {
|
||||
if (!Match.empty())
|
||||
return Match;
|
||||
}
|
||||
llvm_unreachable("No non-empty match group found in list of matches");
|
||||
return llvm::StringRef();
|
||||
}
|
||||
|
||||
llvm::StringRef trimInclude(llvm::StringRef IncludeName) {
|
||||
return IncludeName.trim("\"<>;");
|
||||
}
|
||||
|
||||
} // namespace tooling
|
||||
} // namespace clang
|
||||
|
|
|
@ -458,6 +458,103 @@ TEST_F(SortIncludesTest, HandlesMultilineIncludes) {
|
|||
"#include \"b.h\"\n"));
|
||||
}
|
||||
|
||||
TEST_F(SortIncludesTest, SupportAtImportLines) {
|
||||
// Test from https://github.com/llvm/llvm-project/issues/38995
|
||||
EXPECT_EQ("#import \"a.h\"\n"
|
||||
"#import \"b.h\"\n"
|
||||
"#import \"c.h\"\n"
|
||||
"#import <d/e.h>\n"
|
||||
"@import Foundation;\n",
|
||||
sort("#import \"b.h\"\n"
|
||||
"#import \"c.h\"\n"
|
||||
"#import <d/e.h>\n"
|
||||
"@import Foundation;\n"
|
||||
"#import \"a.h\"\n"));
|
||||
|
||||
// Slightly more complicated test that shows sorting in each priorities still
|
||||
// works.
|
||||
EXPECT_EQ("#import \"a.h\"\n"
|
||||
"#import \"b.h\"\n"
|
||||
"#import \"c.h\"\n"
|
||||
"#import <d/e.h>\n"
|
||||
"@import Base;\n"
|
||||
"@import Foundation;\n"
|
||||
"@import base;\n"
|
||||
"@import foundation;\n",
|
||||
sort("#import \"b.h\"\n"
|
||||
"#import \"c.h\"\n"
|
||||
"@import Base;\n"
|
||||
"#import <d/e.h>\n"
|
||||
"@import foundation;\n"
|
||||
"@import Foundation;\n"
|
||||
"@import base;\n"
|
||||
"#import \"a.h\"\n"));
|
||||
|
||||
// Test that shows main headers in two groups are still found and sorting
|
||||
// still works. The @import's are kept in their respective group but are
|
||||
// put at the end of each group.
|
||||
FmtStyle.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Preserve;
|
||||
EXPECT_EQ("#import \"foo.hpp\"\n"
|
||||
"#import \"b.h\"\n"
|
||||
"#import \"c.h\"\n"
|
||||
"#import <d/e.h>\n"
|
||||
"@import Base;\n"
|
||||
"@import Foundation;\n"
|
||||
"@import foundation;\n"
|
||||
"\n"
|
||||
"#import \"foo.h\"\n"
|
||||
"#include \"foobar\"\n"
|
||||
"#import <f/g.h>\n"
|
||||
"@import AAAA;\n"
|
||||
"@import aaaa;\n",
|
||||
sort("#import \"b.h\"\n"
|
||||
"@import Foundation;\n"
|
||||
"@import foundation;\n"
|
||||
"#import \"c.h\"\n"
|
||||
"#import <d/e.h>\n"
|
||||
"@import Base;\n"
|
||||
"#import \"foo.hpp\"\n"
|
||||
"\n"
|
||||
"@import aaaa;\n"
|
||||
"#import <f/g.h>\n"
|
||||
"@import AAAA;\n"
|
||||
"#include \"foobar\"\n"
|
||||
"#import \"foo.h\"\n",
|
||||
"foo.c", 2));
|
||||
|
||||
// Regrouping and putting @import's in the very last group
|
||||
FmtStyle.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Regroup;
|
||||
EXPECT_EQ("#import \"foo.hpp\"\n"
|
||||
"\n"
|
||||
"#import \"b.h\"\n"
|
||||
"#import \"c.h\"\n"
|
||||
"#import \"foo.h\"\n"
|
||||
"#include \"foobar\"\n"
|
||||
"\n"
|
||||
"#import <d/e.h>\n"
|
||||
"#import <f/g.h>\n"
|
||||
"\n"
|
||||
"@import AAAA;\n"
|
||||
"@import Base;\n"
|
||||
"@import Foundation;\n"
|
||||
"@import aaaa;\n"
|
||||
"@import foundation;\n",
|
||||
sort("#import \"b.h\"\n"
|
||||
"@import Foundation;\n"
|
||||
"@import foundation;\n"
|
||||
"#import \"c.h\"\n"
|
||||
"#import <d/e.h>\n"
|
||||
"@import Base;\n"
|
||||
"#import \"foo.hpp\"\n"
|
||||
"\n"
|
||||
"@import aaaa;\n"
|
||||
"#import <f/g.h>\n"
|
||||
"@import AAAA;\n"
|
||||
"#include \"foobar\"\n"
|
||||
"#import \"foo.h\"\n",
|
||||
"foo.c"));
|
||||
}
|
||||
|
||||
TEST_F(SortIncludesTest, LeavesMainHeaderFirst) {
|
||||
Style.IncludeIsMainRegex = "([-_](test|unittest))?$";
|
||||
EXPECT_EQ("#include \"llvm/a.h\"\n"
|
||||
|
|
Loading…
Reference in New Issue