[Lexer] Fix bug in `makeFileCharRange` called on split tokens.

When the end loc of the specified range is a split token, `makeFileCharRange`
does not process it correctly.  This patch adds proper support for split tokens.

Differential Revision: https://reviews.llvm.org/D105365
This commit is contained in:
Yitzhak Mandelbaum 2021-07-02 18:53:10 +00:00
parent 67002b5f20
commit 93dc73b1e0
2 changed files with 81 additions and 5 deletions

View File

@ -877,6 +877,14 @@ static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range,
return CharSourceRange::getCharRange(Begin, End);
}
// Assumes that `Loc` is in an expansion.
static bool isInExpansionTokenRange(const SourceLocation Loc,
const SourceManager &SM) {
return SM.getSLocEntry(SM.getFileID(Loc))
.getExpansion()
.isExpansionTokenRange();
}
CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
const SourceManager &SM,
const LangOptions &LangOpts) {
@ -896,10 +904,12 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
}
if (Begin.isFileID() && End.isMacroID()) {
if ((Range.isTokenRange() && !isAtEndOfMacroExpansion(End, SM, LangOpts,
&End)) ||
(Range.isCharRange() && !isAtStartOfMacroExpansion(End, SM, LangOpts,
&End)))
if (Range.isTokenRange()) {
if (!isAtEndOfMacroExpansion(End, SM, LangOpts, &End))
return {};
// Use the *original* end, not the expanded one in `End`.
Range.setTokenRange(isInExpansionTokenRange(Range.getEnd(), SM));
} else if (!isAtStartOfMacroExpansion(End, SM, LangOpts, &End))
return {};
Range.setEnd(End);
return makeRangeFromFileLocs(Range, SM, LangOpts);
@ -914,6 +924,9 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
&MacroEnd)))) {
Range.setBegin(MacroBegin);
Range.setEnd(MacroEnd);
// Use the *original* `End`, not the expanded one in `MacroEnd`.
if (Range.isTokenRange())
Range.setTokenRange(isInExpansionTokenRange(End, SM));
return makeRangeFromFileLocs(Range, SM, LangOpts);
}

View File

@ -25,6 +25,7 @@
#include "clang/Lex/PreprocessorOptions.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <memory>
#include <vector>
namespace {
@ -65,7 +66,7 @@ protected:
std::vector<Token> Lex(StringRef Source) {
TrivialModuleLoader ModLoader;
auto PP = CreatePP(Source, ModLoader);
PP = CreatePP(Source, ModLoader);
std::vector<Token> toks;
while (1) {
@ -109,6 +110,7 @@ protected:
LangOptions LangOpts;
std::shared_ptr<TargetOptions> TargetOpts;
IntrusiveRefCntPtr<TargetInfo> Target;
std::unique_ptr<Preprocessor> PP;
};
TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
@ -264,12 +266,14 @@ TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
TEST_F(LexerTest, LexAPI) {
std::vector<tok::TokenKind> ExpectedTokens;
// Line 1 (after the #defines)
ExpectedTokens.push_back(tok::l_square);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::r_square);
ExpectedTokens.push_back(tok::l_square);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::r_square);
// Line 2
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::identifier);
@ -357,6 +361,65 @@ TEST_F(LexerTest, LexAPI) {
EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts));
}
TEST_F(LexerTest, HandlesSplitTokens) {
std::vector<tok::TokenKind> ExpectedTokens;
// Line 1 (after the #defines)
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::less);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::less);
ExpectedTokens.push_back(tok::greatergreater);
// Line 2
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::less);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::less);
ExpectedTokens.push_back(tok::greatergreater);
std::vector<Token> toks = CheckLex("#define TY ty\n"
"#define RANGLE ty<ty<>>\n"
"TY<ty<>>\n"
"RANGLE",
ExpectedTokens);
SourceLocation outerTyLoc = toks[0].getLocation();
SourceLocation innerTyLoc = toks[2].getLocation();
SourceLocation gtgtLoc = toks[4].getLocation();
// Split the token to simulate the action of the parser and force creation of
// an `ExpansionTokenRange`.
SourceLocation rangleLoc = PP->SplitToken(gtgtLoc, 1);
// Verify that it only captures the first greater-then and not the second one.
CharSourceRange range = Lexer::makeFileCharRange(
CharSourceRange::getTokenRange(innerTyLoc, rangleLoc), SourceMgr,
LangOpts);
EXPECT_TRUE(range.isCharRange());
EXPECT_EQ(range.getAsRange(),
SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1)));
// Verify case where range begins in a macro expansion.
range = Lexer::makeFileCharRange(
CharSourceRange::getTokenRange(outerTyLoc, rangleLoc), SourceMgr,
LangOpts);
EXPECT_TRUE(range.isCharRange());
EXPECT_EQ(range.getAsRange(),
SourceRange(SourceMgr.getExpansionLoc(outerTyLoc),
gtgtLoc.getLocWithOffset(1)));
SourceLocation macroInnerTyLoc = toks[7].getLocation();
SourceLocation macroGtgtLoc = toks[9].getLocation();
// Split the token to simulate the action of the parser and force creation of
// an `ExpansionTokenRange`.
SourceLocation macroRAngleLoc = PP->SplitToken(macroGtgtLoc, 1);
// Verify that it fails (because it only captures the first greater-then and
// not the second one, so it doesn't span the entire macro expansion).
range = Lexer::makeFileCharRange(
CharSourceRange::getTokenRange(macroInnerTyLoc, macroRAngleLoc),
SourceMgr, LangOpts);
EXPECT_TRUE(range.isInvalid());
}
TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) {
std::vector<Token> toks =
Lex("#define helper1 0\n"