[Syntax] expose API for expansions overlapping a spelled token range.

Summary:
This allows efficiently accessing all expansions (without iterating over each
token and searching), and also identifying tokens within a range that are
affected by the preprocessor (which is how clangd will use it).

Subscribers: ilya-biryukov, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D84009
This commit is contained in:
Sam McCall 2020-07-17 11:35:04 +02:00
parent 68a1cbe11a
commit f0ab336e74
3 changed files with 85 additions and 35 deletions

View File

@ -275,6 +275,10 @@ public:
/// macro expands to.
llvm::Optional<Expansion>
expansionStartingAt(const syntax::Token *Spelled) const;
/// Returns all expansions (partially) expanded from the specified tokens.
/// This is the expansions whose Spelled range intersects \p Spelled.
std::vector<Expansion>
expansionsOverlapping(llvm::ArrayRef<syntax::Token> Spelled) const;
/// Lexed tokens of a file before preprocessing. E.g. for the following input
/// #define DECL(name) int name = 10
@ -352,6 +356,12 @@ private:
mappingStartingBeforeSpelled(const MarkedFile &F,
const syntax::Token *Spelled);
/// Convert a private Mapping to a public Expansion.
Expansion makeExpansion(const MarkedFile &, const Mapping &) const;
/// Returns the file that the Spelled tokens are taken from.
/// Asserts that they are non-empty, from a tracked file, and in-bounds.
const MarkedFile &fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const;
/// Token stream produced after preprocessing, conceputally this captures the
/// same stream as 'clang -E' (excluding the preprocessor directives like
/// #file, etc.).

View File

@ -249,22 +249,7 @@ llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
if (Spelled.empty())
return {};
assert(Spelled.front().location().isFileID());
auto FID = sourceManager().getFileID(Spelled.front().location());
auto It = Files.find(FID);
assert(It != Files.end());
const MarkedFile &File = It->second;
// `Spelled` must be a subrange of `File.SpelledTokens`.
assert(File.SpelledTokens.data() <= Spelled.data());
assert(&Spelled.back() <=
File.SpelledTokens.data() + File.SpelledTokens.size());
#ifndef NDEBUG
auto T1 = Spelled.back().location();
auto T2 = File.SpelledTokens.back().location();
assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2));
#endif
const auto &File = fileForSpelled(Spelled);
auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front());
unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data();
@ -395,16 +380,39 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
: LastSpelled + 1);
}
TokenBuffer::Expansion TokenBuffer::makeExpansion(const MarkedFile &F,
const Mapping &M) const {
Expansion E;
E.Spelled = llvm::makeArrayRef(F.SpelledTokens.data() + M.BeginSpelled,
F.SpelledTokens.data() + M.EndSpelled);
E.Expanded = llvm::makeArrayRef(ExpandedTokens.data() + M.BeginExpanded,
ExpandedTokens.data() + M.EndExpanded);
return E;
}
const TokenBuffer::MarkedFile &
TokenBuffer::fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
assert(!Spelled.empty());
assert(Spelled.front().location().isFileID() && "not a spelled token");
auto FileIt = Files.find(SourceMgr->getFileID(Spelled.front().location()));
assert(FileIt != Files.end() && "file not tracked by token buffer");
const auto &File = FileIt->second;
assert(File.SpelledTokens.data() <= Spelled.data() &&
Spelled.end() <=
(File.SpelledTokens.data() + File.SpelledTokens.size()) &&
"Tokens not in spelled range");
#ifndef NDEBUG
auto T1 = Spelled.back().location();
auto T2 = File.SpelledTokens.back().location();
assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2));
#endif
return File;
}
llvm::Optional<TokenBuffer::Expansion>
TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const {
assert(Spelled);
assert(Spelled->location().isFileID() && "not a spelled token");
auto FileIt = Files.find(SourceMgr->getFileID(Spelled->location()));
assert(FileIt != Files.end() && "file not tracked by token buffer");
auto &File = FileIt->second;
assert(File.SpelledTokens.data() <= Spelled &&
Spelled < (File.SpelledTokens.data() + File.SpelledTokens.size()));
const auto &File = fileForSpelled(*Spelled);
unsigned SpelledIndex = Spelled - File.SpelledTokens.data();
auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
@ -412,14 +420,27 @@ TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const {
});
if (M == File.Mappings.end() || M->BeginSpelled != SpelledIndex)
return llvm::None;
Expansion E;
E.Spelled = llvm::makeArrayRef(File.SpelledTokens.data() + M->BeginSpelled,
File.SpelledTokens.data() + M->EndSpelled);
E.Expanded = llvm::makeArrayRef(ExpandedTokens.data() + M->BeginExpanded,
ExpandedTokens.data() + M->EndExpanded);
return E;
return makeExpansion(File, *M);
}
std::vector<TokenBuffer::Expansion> TokenBuffer::expansionsOverlapping(
llvm::ArrayRef<syntax::Token> Spelled) const {
if (Spelled.empty())
return {};
const auto &File = fileForSpelled(Spelled);
// Find the first overlapping range, and then copy until we stop overlapping.
unsigned SpelledBeginIndex = Spelled.begin() - File.SpelledTokens.data();
unsigned SpelledEndIndex = Spelled.end() - File.SpelledTokens.data();
auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
return M.EndSpelled <= SpelledBeginIndex;
});
std::vector<TokenBuffer::Expansion> Expansions;
for (; M != File.Mappings.end() && M->BeginSpelled < SpelledEndIndex; ++M)
Expansions.push_back(makeExpansion(File, *M));
return Expansions;
}
llvm::ArrayRef<syntax::Token>
syntax::spelledTokensTouching(SourceLocation Loc,
llvm::ArrayRef<syntax::Token> Tokens) {

View File

@ -53,6 +53,7 @@ using namespace clang;
using namespace clang::syntax;
using llvm::ValueIs;
using ::testing::_;
using ::testing::AllOf;
using ::testing::Contains;
using ::testing::ElementsAre;
@ -755,7 +756,7 @@ TEST_F(TokenBufferTest, ExpandedTokensForRange) {
EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty());
}
TEST_F(TokenBufferTest, ExpansionStartingAt) {
TEST_F(TokenBufferTest, ExpansionsOverlapping) {
// Object-like macro expansions.
recordTokens(R"cpp(
#define FOO 3+4
@ -763,17 +764,25 @@ TEST_F(TokenBufferTest, ExpansionStartingAt) {
int b = FOO 2;
)cpp");
llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1").drop_back();
llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1");
EXPECT_THAT(
Buffer.expansionStartingAt(Foo1.data()),
ValueIs(IsExpansion(SameRange(Foo1),
ValueIs(IsExpansion(SameRange(Foo1.drop_back()),
SameRange(findExpanded("3 + 4 1").drop_back()))));
EXPECT_THAT(
Buffer.expansionsOverlapping(Foo1),
ElementsAre(IsExpansion(SameRange(Foo1.drop_back()),
SameRange(findExpanded("3 + 4 1").drop_back()))));
llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2").drop_back();
llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2");
EXPECT_THAT(
Buffer.expansionStartingAt(Foo2.data()),
ValueIs(IsExpansion(SameRange(Foo2),
ValueIs(IsExpansion(SameRange(Foo2.drop_back()),
SameRange(findExpanded("3 + 4 2").drop_back()))));
EXPECT_THAT(Buffer.expansionsOverlapping(
llvm::makeArrayRef(Foo1.begin(), Foo2.end())),
ElementsAre(IsExpansion(SameRange(Foo1.drop_back()), _),
IsExpansion(SameRange(Foo2.drop_back()), _)));
// Function-like macro expansions.
recordTokens(R"cpp(
@ -798,6 +807,11 @@ TEST_F(TokenBufferTest, ExpansionStartingAt) {
for (const auto &T : ID2.drop_front())
EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
EXPECT_THAT(Buffer.expansionsOverlapping(llvm::makeArrayRef(
findSpelled("1 + 2").data(), findSpelled("4").data())),
ElementsAre(IsExpansion(SameRange(ID1), _),
IsExpansion(SameRange(ID2), _)));
// PP directives.
recordTokens(R"cpp(
#define FOO 1
@ -823,6 +837,11 @@ int b = 1;
// Only the first spelled token should be found.
for (const auto &T : PragmaOnce.drop_front())
EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
EXPECT_THAT(
Buffer.expansionsOverlapping(findSpelled("FOO ; # pragma")),
ElementsAre(IsExpansion(SameRange(findSpelled("FOO ;").drop_back()), _),
IsExpansion(SameRange(PragmaOnce), _)));
}
TEST_F(TokenBufferTest, TokensToFileRange) {