[Lexer] Improve Lexer::getSourceText() when the given range deals with function macro arguments.

This is a modified version of a patch by Manuel Klimek.

llvm-svn: 182055
This commit is contained in:
Argyrios Kyrtzidis 2013-05-16 21:37:39 +00:00
parent 0835ca12ef
commit 065d720c31
4 changed files with 392 additions and 99 deletions

View File

@ -1161,6 +1161,22 @@ public:
/// expansion but not the expansion of an argument to a function-like macro.
bool isMacroBodyExpansion(SourceLocation Loc) const;
/// \brief Returns true if the given MacroID location points at the beginning
/// of the immediate macro expansion.
///
/// \param MacroBegin If non-null and function returns true, it is set to the
/// begin location of the immediate macro expansion.
bool isAtStartOfImmediateMacroExpansion(SourceLocation Loc,
SourceLocation *MacroBegin = 0) const;
/// \brief Returns true if the given MacroID location points at the character
/// end of the immediate macro expansion.
///
/// \param MacroEnd If non-null and function returns true, it is set to the
/// character end location of the immediate macro expansion.
bool isAtEndOfImmediateMacroExpansion(SourceLocation Loc,
SourceLocation *MacroEnd = 0) const;
/// \brief Returns true if \p Loc is inside the [\p Start, +\p Length)
/// chunk of the source location address space.
///
@ -1570,6 +1586,14 @@ private:
return SLocOffset < getSLocEntryByID(FID.ID+1).getOffset();
}
/// \brief Returns the previous in-order FileID or an invalid FileID if there
/// is no previous one.
FileID getPreviousFileID(FileID FID) const;
/// \brief Returns the next in-order FileID or an invalid FileID if there is
/// no next one.
FileID getNextFileID(FileID FID) const;
/// \brief Create a new fileID for the specified ContentCache and
/// include position.
///

View File

@ -536,6 +536,43 @@ SourceManager::getFakeContentCacheForRecovery() const {
return FakeContentCacheForRecovery;
}
/// \brief Returns the previous in-order FileID or an invalid FileID if there
/// is no previous one.
FileID SourceManager::getPreviousFileID(FileID FID) const {
if (FID.isInvalid())
return FileID();
int ID = FID.ID;
if (ID == -1)
return FileID();
if (ID > 0) {
if (ID-1 == 0)
return FileID();
} else if (unsigned(-(ID-1) - 2) >= LoadedSLocEntryTable.size()) {
return FileID();
}
return FileID::get(ID-1);
}
/// \brief Returns the next in-order FileID or an invalid FileID if there is
/// no next one.
FileID SourceManager::getNextFileID(FileID FID) const {
if (FID.isInvalid())
return FileID();
int ID = FID.ID;
if (ID > 0) {
if (unsigned(ID+1) >= local_sloc_entry_size())
return FileID();
} else if (ID+1 >= -1) {
return FileID();
}
return FileID::get(ID+1);
}
//===----------------------------------------------------------------------===//
// Methods to create new FileID's and macro expansions.
//===----------------------------------------------------------------------===//
@ -998,6 +1035,77 @@ bool SourceManager::isMacroBodyExpansion(SourceLocation Loc) const {
return Expansion.isMacroBodyExpansion();
}
bool SourceManager::isAtStartOfImmediateMacroExpansion(SourceLocation Loc,
SourceLocation *MacroBegin) const {
assert(Loc.isValid() && Loc.isMacroID() && "Expected a valid macro loc");
std::pair<FileID, unsigned> DecompLoc = getDecomposedLoc(Loc);
if (DecompLoc.second > 0)
return false; // Does not point at the start of expansion range.
bool Invalid = false;
const SrcMgr::ExpansionInfo &ExpInfo =
getSLocEntry(DecompLoc.first, &Invalid).getExpansion();
if (Invalid)
return false;
SourceLocation ExpLoc = ExpInfo.getExpansionLocStart();
if (ExpInfo.isMacroArgExpansion()) {
// For macro argument expansions, check if the previous FileID is part of
// the same argument expansion, in which case this Loc is not at the
// beginning of the expansion.
FileID PrevFID = getPreviousFileID(DecompLoc.first);
if (!PrevFID.isInvalid()) {
const SrcMgr::SLocEntry &PrevEntry = getSLocEntry(PrevFID, &Invalid);
if (Invalid)
return false;
if (PrevEntry.isExpansion() &&
PrevEntry.getExpansion().getExpansionLocStart() == ExpLoc)
return false;
}
}
if (MacroBegin)
*MacroBegin = ExpLoc;
return true;
}
bool SourceManager::isAtEndOfImmediateMacroExpansion(SourceLocation Loc,
SourceLocation *MacroEnd) const {
assert(Loc.isValid() && Loc.isMacroID() && "Expected a valid macro loc");
FileID FID = getFileID(Loc);
SourceLocation NextLoc = Loc.getLocWithOffset(1);
if (isInFileID(NextLoc, FID))
return false; // Does not point at the end of expansion range.
bool Invalid = false;
const SrcMgr::ExpansionInfo &ExpInfo =
getSLocEntry(FID, &Invalid).getExpansion();
if (Invalid)
return false;
if (ExpInfo.isMacroArgExpansion()) {
// For macro argument expansions, check if the next FileID is part of the
// same argument expansion, in which case this Loc is not at the end of the
// expansion.
FileID NextFID = getNextFileID(FID);
if (!NextFID.isInvalid()) {
const SrcMgr::SLocEntry &NextEntry = getSLocEntry(NextFID, &Invalid);
if (Invalid)
return false;
if (NextEntry.isExpansion() &&
NextEntry.getExpansion().getExpansionLocStart() ==
ExpInfo.getExpansionLocStart())
return false;
}
}
if (MacroEnd)
*MacroEnd = ExpInfo.getExpansionLocEnd();
return true;
}
//===----------------------------------------------------------------------===//
// Queries about the code at a SourceLocation.

View File

@ -798,14 +798,10 @@ bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc,
SourceLocation *MacroBegin) {
assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc");
std::pair<FileID, unsigned> infoLoc = SM.getDecomposedLoc(loc);
// FIXME: If the token comes from the macro token paste operator ('##')
// this function will always return false;
if (infoLoc.second > 0)
return false; // Does not point at the start of token.
SourceLocation expansionLoc;
if (!SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc))
return false;
SourceLocation expansionLoc =
SM.getSLocEntry(infoLoc.first).getExpansion().getExpansionLocStart();
if (expansionLoc.isFileID()) {
// No other macro expansions, this is the first.
if (MacroBegin)
@ -829,16 +825,11 @@ bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc,
if (tokLen == 0)
return false;
FileID FID = SM.getFileID(loc);
SourceLocation afterLoc = loc.getLocWithOffset(tokLen+1);
if (SM.isInFileID(afterLoc, FID))
return false; // Still in the same FileID, does not point to the last token.
SourceLocation afterLoc = loc.getLocWithOffset(tokLen);
SourceLocation expansionLoc;
if (!SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc))
return false;
// FIXME: If the token comes from the macro token paste operator ('##')
// or the stringify operator ('#') this function will always return false;
SourceLocation expansionLoc =
SM.getSLocEntry(FID).getExpansion().getExpansionLocEnd();
if (expansionLoc.isFileID()) {
// No other macro expansions.
if (MacroEnd)
@ -916,25 +907,25 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
return makeRangeFromFileLocs(Range, SM, LangOpts);
}
FileID FID;
unsigned BeginOffs;
llvm::tie(FID, BeginOffs) = SM.getDecomposedLoc(Begin);
if (FID.isInvalid())
bool Invalid = false;
const SrcMgr::SLocEntry &BeginEntry = SM.getSLocEntry(SM.getFileID(Begin),
&Invalid);
if (Invalid)
return CharSourceRange();
unsigned EndOffs;
if (!SM.isInFileID(End, FID, &EndOffs) ||
BeginOffs > EndOffs)
return CharSourceRange();
if (BeginEntry.getExpansion().isMacroArgExpansion()) {
const SrcMgr::SLocEntry &EndEntry = SM.getSLocEntry(SM.getFileID(End),
&Invalid);
if (Invalid)
return CharSourceRange();
const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID);
const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
if (Expansion.isMacroArgExpansion() &&
Expansion.getSpellingLoc().isFileID()) {
SourceLocation SpellLoc = Expansion.getSpellingLoc();
Range.setBegin(SpellLoc.getLocWithOffset(BeginOffs));
Range.setEnd(SpellLoc.getLocWithOffset(EndOffs));
return makeRangeFromFileLocs(Range, SM, LangOpts);
if (EndEntry.getExpansion().isMacroArgExpansion() &&
BeginEntry.getExpansion().getExpansionLocStart() ==
EndEntry.getExpansion().getExpansionLocStart()) {
Range.setBegin(SM.getImmediateSpellingLoc(Begin));
Range.setEnd(SM.getImmediateSpellingLoc(End));
return makeFileCharRange(Range, SM, LangOpts);
}
}
return CharSourceRange();

View File

@ -28,30 +28,6 @@ using namespace clang;
namespace {
// The test fixture.
class LexerTest : public ::testing::Test {
protected:
LexerTest()
: FileMgr(FileMgrOpts),
DiagID(new DiagnosticIDs()),
Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
SourceMgr(Diags, FileMgr),
TargetOpts(new TargetOptions)
{
TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
Target = TargetInfo::CreateTargetInfo(Diags, &*TargetOpts);
}
FileSystemOptions FileMgrOpts;
FileManager FileMgr;
IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
DiagnosticsEngine Diags;
SourceManager SourceMgr;
LangOptions LangOpts;
IntrusiveRefCntPtr<TargetOptions> TargetOpts;
IntrusiveRefCntPtr<TargetInfo> Target;
};
class VoidModuleLoader : public ModuleLoader {
virtual ModuleLoadResult loadModule(SourceLocation ImportLoc,
ModuleIdPath Path,
@ -66,51 +42,245 @@ class VoidModuleLoader : public ModuleLoader {
bool Complain) { }
};
TEST_F(LexerTest, LexAPI) {
const char *source =
"#define M(x) [x]\n"
"#define N(x) x\n"
"#define INN(x) x\n"
"#define NOF1 INN(val)\n"
"#define NOF2 val\n"
"M(foo) N([bar])\n"
"N(INN(val)) N(NOF1) N(NOF2) N(val)";
MemoryBuffer *buf = MemoryBuffer::getMemBuffer(source);
(void)SourceMgr.createMainFileIDForMemBuffer(buf);
VoidModuleLoader ModLoader;
HeaderSearch HeaderInfo(new HeaderSearchOptions, FileMgr, Diags, LangOpts,
Target.getPtr());
Preprocessor PP(new PreprocessorOptions(), Diags, LangOpts, Target.getPtr(),
SourceMgr, HeaderInfo, ModLoader,
/*IILookup =*/ 0,
/*OwnsHeaderSearch =*/false,
/*DelayInitialization =*/ false);
PP.EnterMainSourceFile();
std::vector<Token> toks;
while (1) {
Token tok;
PP.Lex(tok);
if (tok.is(tok::eof))
break;
toks.push_back(tok);
// The test fixture.
class LexerTest : public ::testing::Test {
protected:
LexerTest()
: FileMgr(FileMgrOpts),
DiagID(new DiagnosticIDs()),
Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
SourceMgr(Diags, FileMgr),
TargetOpts(new TargetOptions)
{
TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
Target = TargetInfo::CreateTargetInfo(Diags, &*TargetOpts);
}
// Make sure we got the tokens that we expected.
ASSERT_EQ(10U, toks.size());
ASSERT_EQ(tok::l_square, toks[0].getKind());
ASSERT_EQ(tok::identifier, toks[1].getKind());
ASSERT_EQ(tok::r_square, toks[2].getKind());
ASSERT_EQ(tok::l_square, toks[3].getKind());
ASSERT_EQ(tok::identifier, toks[4].getKind());
ASSERT_EQ(tok::r_square, toks[5].getKind());
ASSERT_EQ(tok::identifier, toks[6].getKind());
ASSERT_EQ(tok::identifier, toks[7].getKind());
ASSERT_EQ(tok::identifier, toks[8].getKind());
ASSERT_EQ(tok::identifier, toks[9].getKind());
std::vector<Token> CheckLex(StringRef Source,
ArrayRef<tok::TokenKind> ExpectedTokens) {
MemoryBuffer *buf = MemoryBuffer::getMemBuffer(Source);
(void) SourceMgr.createMainFileIDForMemBuffer(buf);
VoidModuleLoader ModLoader;
HeaderSearch HeaderInfo(new HeaderSearchOptions, FileMgr, Diags, LangOpts,
Target.getPtr());
Preprocessor PP(new PreprocessorOptions(), Diags, LangOpts, Target.getPtr(),
SourceMgr, HeaderInfo, ModLoader, /*IILookup =*/ 0,
/*OwnsHeaderSearch =*/ false,
/*DelayInitialization =*/ false);
PP.EnterMainSourceFile();
std::vector<Token> toks;
while (1) {
Token tok;
PP.Lex(tok);
if (tok.is(tok::eof))
break;
toks.push_back(tok);
}
EXPECT_EQ(ExpectedTokens.size(), toks.size());
for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) {
EXPECT_EQ(ExpectedTokens[i], toks[i].getKind());
}
return toks;
}
std::string getSourceText(Token Begin, Token End) {
bool Invalid;
StringRef Str =
Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange(
Begin.getLocation(), End.getLocation())),
SourceMgr, LangOpts, &Invalid);
if (Invalid)
return "<INVALID>";
return Str;
}
FileSystemOptions FileMgrOpts;
FileManager FileMgr;
IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
DiagnosticsEngine Diags;
SourceManager SourceMgr;
LangOptions LangOpts;
IntrusiveRefCntPtr<TargetOptions> TargetOpts;
IntrusiveRefCntPtr<TargetInfo> Target;
};
TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
std::vector<tok::TokenKind> ExpectedTokens;
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::l_paren);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::r_paren);
std::vector<Token> toks = CheckLex("#define M(x) x\n"
"M(f(M(i)))",
ExpectedTokens);
EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2]));
}
TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) {
std::vector<tok::TokenKind> ExpectedTokens;
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::identifier);
std::vector<Token> toks = CheckLex("#define M(x) x\n"
"M(M(i) c)",
ExpectedTokens);
EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0]));
}
TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) {
std::vector<tok::TokenKind> ExpectedTokens;
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::identifier);
std::vector<Token> toks = CheckLex("#define M(x) x\n"
"M(c c M(i))",
ExpectedTokens);
EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2]));
}
TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) {
std::vector<tok::TokenKind> ExpectedTokens;
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::identifier);
std::vector<Token> toks = CheckLex("#define M(x) x\n"
"M(M(i) c c)",
ExpectedTokens);
EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1]));
}
TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) {
std::vector<tok::TokenKind> ExpectedTokens;
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::identifier);
std::vector<Token> toks = CheckLex("#define M(x) x\n"
"M(c M(i)) M(M(i) c)",
ExpectedTokens);
EXPECT_EQ("<INVALID>", getSourceText(toks[1], toks[2]));
}
TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) {
std::vector<tok::TokenKind> ExpectedTokens;
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::l_paren);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::r_paren);
std::vector<Token> toks = CheckLex("#define M(x) x\n"
"#define C(x) M(x##c)\n"
"M(f(C(i)))",
ExpectedTokens);
EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2]));
}
TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) {
std::vector<tok::TokenKind> ExpectedTokens;
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::l_paren);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::r_paren);
std::vector<Token> toks = CheckLex("#define M(x) x\n"
"f(M(M(i)))",
ExpectedTokens);
EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2]));
}
TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) {
std::vector<tok::TokenKind> ExpectedTokens;
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::l_paren);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::r_paren);
std::vector<Token> toks = CheckLex("#define M(x) x\n"
"M(f(i))",
ExpectedTokens);
EXPECT_EQ("i", getSourceText(toks[2], toks[2]));
}
TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) {
std::vector<tok::TokenKind> ExpectedTokens;
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::l_paren);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::r_paren);
std::vector<Token> toks = CheckLex("#define M(x) x\n"
"#define C(x) x\n"
"f(C(M(i)))",
ExpectedTokens);
EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2]));
}
TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) {
std::vector<tok::TokenKind> ExpectedTokens;
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::l_paren);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::r_paren);
std::vector<Token> toks = CheckLex("#define M(x) x\n"
"#define C(x) c x\n"
"f(C(M(i)))",
ExpectedTokens);
EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
}
TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
std::vector<tok::TokenKind> ExpectedTokens;
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::l_paren);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::r_paren);
std::vector<Token> toks = CheckLex("#define M(x) x\n"
"#define C(x) c M(x)\n"
"C(f(M(i)))",
ExpectedTokens);
EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
}
TEST_F(LexerTest, LexAPI) {
std::vector<tok::TokenKind> ExpectedTokens;
ExpectedTokens.push_back(tok::l_square);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::r_square);
ExpectedTokens.push_back(tok::l_square);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::r_square);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::identifier);
std::vector<Token> toks = CheckLex("#define M(x) [x]\n"
"#define N(x) x\n"
"#define INN(x) x\n"
"#define NOF1 INN(val)\n"
"#define NOF2 val\n"
"M(foo) N([bar])\n"
"N(INN(val)) N(NOF1) N(NOF2) N(val)",
ExpectedTokens);
SourceLocation lsqrLoc = toks[0].getLocation();
SourceLocation idLoc = toks[1].getLocation();
SourceLocation rsqrLoc = toks[2].getLocation();