From 065d720c314702a9253eb243b38e72cb538c31c7 Mon Sep 17 00:00:00 2001 From: Argyrios Kyrtzidis Date: Thu, 16 May 2013 21:37:39 +0000 Subject: [PATCH] [Lexer] Improve Lexer::getSourceText() when the given range deals with function macro arguments. This is a modified version of a patch by Manuel Klimek. llvm-svn: 182055 --- clang/include/clang/Basic/SourceManager.h | 24 ++ clang/lib/Basic/SourceManager.cpp | 108 ++++++++ clang/lib/Lex/Lexer.cpp | 55 ++-- clang/unittests/Lex/LexerTest.cpp | 304 +++++++++++++++++----- 4 files changed, 392 insertions(+), 99 deletions(-) diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index eccbf1ede7f1..5fd5a47a4a75 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -1161,6 +1161,22 @@ public: /// expansion but not the expansion of an argument to a function-like macro. bool isMacroBodyExpansion(SourceLocation Loc) const; + /// \brief Returns true if the given MacroID location points at the beginning + /// of the immediate macro expansion. + /// + /// \param MacroBegin If non-null and function returns true, it is set to the + /// begin location of the immediate macro expansion. + bool isAtStartOfImmediateMacroExpansion(SourceLocation Loc, + SourceLocation *MacroBegin = 0) const; + + /// \brief Returns true if the given MacroID location points at the character + /// end of the immediate macro expansion. + /// + /// \param MacroEnd If non-null and function returns true, it is set to the + /// character end location of the immediate macro expansion. + bool isAtEndOfImmediateMacroExpansion(SourceLocation Loc, + SourceLocation *MacroEnd = 0) const; + /// \brief Returns true if \p Loc is inside the [\p Start, +\p Length) /// chunk of the source location address space. /// @@ -1570,6 +1586,14 @@ private: return SLocOffset < getSLocEntryByID(FID.ID+1).getOffset(); } + /// \brief Returns the previous in-order FileID or an invalid FileID if there + /// is no previous one. + FileID getPreviousFileID(FileID FID) const; + + /// \brief Returns the next in-order FileID or an invalid FileID if there is + /// no next one. + FileID getNextFileID(FileID FID) const; + /// \brief Create a new fileID for the specified ContentCache and /// include position. /// diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index d6dc6d6328af..6994b3009399 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -536,6 +536,43 @@ SourceManager::getFakeContentCacheForRecovery() const { return FakeContentCacheForRecovery; } +/// \brief Returns the previous in-order FileID or an invalid FileID if there +/// is no previous one. +FileID SourceManager::getPreviousFileID(FileID FID) const { + if (FID.isInvalid()) + return FileID(); + + int ID = FID.ID; + if (ID == -1) + return FileID(); + + if (ID > 0) { + if (ID-1 == 0) + return FileID(); + } else if (unsigned(-(ID-1) - 2) >= LoadedSLocEntryTable.size()) { + return FileID(); + } + + return FileID::get(ID-1); +} + +/// \brief Returns the next in-order FileID or an invalid FileID if there is +/// no next one. +FileID SourceManager::getNextFileID(FileID FID) const { + if (FID.isInvalid()) + return FileID(); + + int ID = FID.ID; + if (ID > 0) { + if (unsigned(ID+1) >= local_sloc_entry_size()) + return FileID(); + } else if (ID+1 >= -1) { + return FileID(); + } + + return FileID::get(ID+1); +} + //===----------------------------------------------------------------------===// // Methods to create new FileID's and macro expansions. //===----------------------------------------------------------------------===// @@ -998,6 +1035,77 @@ bool SourceManager::isMacroBodyExpansion(SourceLocation Loc) const { return Expansion.isMacroBodyExpansion(); } +bool SourceManager::isAtStartOfImmediateMacroExpansion(SourceLocation Loc, + SourceLocation *MacroBegin) const { + assert(Loc.isValid() && Loc.isMacroID() && "Expected a valid macro loc"); + + std::pair DecompLoc = getDecomposedLoc(Loc); + if (DecompLoc.second > 0) + return false; // Does not point at the start of expansion range. + + bool Invalid = false; + const SrcMgr::ExpansionInfo &ExpInfo = + getSLocEntry(DecompLoc.first, &Invalid).getExpansion(); + if (Invalid) + return false; + SourceLocation ExpLoc = ExpInfo.getExpansionLocStart(); + + if (ExpInfo.isMacroArgExpansion()) { + // For macro argument expansions, check if the previous FileID is part of + // the same argument expansion, in which case this Loc is not at the + // beginning of the expansion. + FileID PrevFID = getPreviousFileID(DecompLoc.first); + if (!PrevFID.isInvalid()) { + const SrcMgr::SLocEntry &PrevEntry = getSLocEntry(PrevFID, &Invalid); + if (Invalid) + return false; + if (PrevEntry.isExpansion() && + PrevEntry.getExpansion().getExpansionLocStart() == ExpLoc) + return false; + } + } + + if (MacroBegin) + *MacroBegin = ExpLoc; + return true; +} + +bool SourceManager::isAtEndOfImmediateMacroExpansion(SourceLocation Loc, + SourceLocation *MacroEnd) const { + assert(Loc.isValid() && Loc.isMacroID() && "Expected a valid macro loc"); + + FileID FID = getFileID(Loc); + SourceLocation NextLoc = Loc.getLocWithOffset(1); + if (isInFileID(NextLoc, FID)) + return false; // Does not point at the end of expansion range. + + bool Invalid = false; + const SrcMgr::ExpansionInfo &ExpInfo = + getSLocEntry(FID, &Invalid).getExpansion(); + if (Invalid) + return false; + + if (ExpInfo.isMacroArgExpansion()) { + // For macro argument expansions, check if the next FileID is part of the + // same argument expansion, in which case this Loc is not at the end of the + // expansion. + FileID NextFID = getNextFileID(FID); + if (!NextFID.isInvalid()) { + const SrcMgr::SLocEntry &NextEntry = getSLocEntry(NextFID, &Invalid); + if (Invalid) + return false; + if (NextEntry.isExpansion() && + NextEntry.getExpansion().getExpansionLocStart() == + ExpInfo.getExpansionLocStart()) + return false; + } + } + + if (MacroEnd) + *MacroEnd = ExpInfo.getExpansionLocEnd(); + return true; +} + //===----------------------------------------------------------------------===// // Queries about the code at a SourceLocation. diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 66498b1a2c9d..e58581ee06ba 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -798,14 +798,10 @@ bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc, SourceLocation *MacroBegin) { assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc"); - std::pair infoLoc = SM.getDecomposedLoc(loc); - // FIXME: If the token comes from the macro token paste operator ('##') - // this function will always return false; - if (infoLoc.second > 0) - return false; // Does not point at the start of token. + SourceLocation expansionLoc; + if (!SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc)) + return false; - SourceLocation expansionLoc = - SM.getSLocEntry(infoLoc.first).getExpansion().getExpansionLocStart(); if (expansionLoc.isFileID()) { // No other macro expansions, this is the first. if (MacroBegin) @@ -829,16 +825,11 @@ bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc, if (tokLen == 0) return false; - FileID FID = SM.getFileID(loc); - SourceLocation afterLoc = loc.getLocWithOffset(tokLen+1); - if (SM.isInFileID(afterLoc, FID)) - return false; // Still in the same FileID, does not point to the last token. + SourceLocation afterLoc = loc.getLocWithOffset(tokLen); + SourceLocation expansionLoc; + if (!SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc)) + return false; - // FIXME: If the token comes from the macro token paste operator ('##') - // or the stringify operator ('#') this function will always return false; - - SourceLocation expansionLoc = - SM.getSLocEntry(FID).getExpansion().getExpansionLocEnd(); if (expansionLoc.isFileID()) { // No other macro expansions. if (MacroEnd) @@ -916,25 +907,25 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, return makeRangeFromFileLocs(Range, SM, LangOpts); } - FileID FID; - unsigned BeginOffs; - llvm::tie(FID, BeginOffs) = SM.getDecomposedLoc(Begin); - if (FID.isInvalid()) + bool Invalid = false; + const SrcMgr::SLocEntry &BeginEntry = SM.getSLocEntry(SM.getFileID(Begin), + &Invalid); + if (Invalid) return CharSourceRange(); - unsigned EndOffs; - if (!SM.isInFileID(End, FID, &EndOffs) || - BeginOffs > EndOffs) - return CharSourceRange(); + if (BeginEntry.getExpansion().isMacroArgExpansion()) { + const SrcMgr::SLocEntry &EndEntry = SM.getSLocEntry(SM.getFileID(End), + &Invalid); + if (Invalid) + return CharSourceRange(); - const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID); - const SrcMgr::ExpansionInfo &Expansion = E->getExpansion(); - if (Expansion.isMacroArgExpansion() && - Expansion.getSpellingLoc().isFileID()) { - SourceLocation SpellLoc = Expansion.getSpellingLoc(); - Range.setBegin(SpellLoc.getLocWithOffset(BeginOffs)); - Range.setEnd(SpellLoc.getLocWithOffset(EndOffs)); - return makeRangeFromFileLocs(Range, SM, LangOpts); + if (EndEntry.getExpansion().isMacroArgExpansion() && + BeginEntry.getExpansion().getExpansionLocStart() == + EndEntry.getExpansion().getExpansionLocStart()) { + Range.setBegin(SM.getImmediateSpellingLoc(Begin)); + Range.setEnd(SM.getImmediateSpellingLoc(End)); + return makeFileCharRange(Range, SM, LangOpts); + } } return CharSourceRange(); diff --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp index c9b1840e1c04..a8e25cb2a394 100644 --- a/clang/unittests/Lex/LexerTest.cpp +++ b/clang/unittests/Lex/LexerTest.cpp @@ -28,30 +28,6 @@ using namespace clang; namespace { -// The test fixture. -class LexerTest : public ::testing::Test { -protected: - LexerTest() - : FileMgr(FileMgrOpts), - DiagID(new DiagnosticIDs()), - Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()), - SourceMgr(Diags, FileMgr), - TargetOpts(new TargetOptions) - { - TargetOpts->Triple = "x86_64-apple-darwin11.1.0"; - Target = TargetInfo::CreateTargetInfo(Diags, &*TargetOpts); - } - - FileSystemOptions FileMgrOpts; - FileManager FileMgr; - IntrusiveRefCntPtr DiagID; - DiagnosticsEngine Diags; - SourceManager SourceMgr; - LangOptions LangOpts; - IntrusiveRefCntPtr TargetOpts; - IntrusiveRefCntPtr Target; -}; - class VoidModuleLoader : public ModuleLoader { virtual ModuleLoadResult loadModule(SourceLocation ImportLoc, ModuleIdPath Path, @@ -66,51 +42,245 @@ class VoidModuleLoader : public ModuleLoader { bool Complain) { } }; -TEST_F(LexerTest, LexAPI) { - const char *source = - "#define M(x) [x]\n" - "#define N(x) x\n" - "#define INN(x) x\n" - "#define NOF1 INN(val)\n" - "#define NOF2 val\n" - "M(foo) N([bar])\n" - "N(INN(val)) N(NOF1) N(NOF2) N(val)"; - - MemoryBuffer *buf = MemoryBuffer::getMemBuffer(source); - (void)SourceMgr.createMainFileIDForMemBuffer(buf); - - VoidModuleLoader ModLoader; - HeaderSearch HeaderInfo(new HeaderSearchOptions, FileMgr, Diags, LangOpts, - Target.getPtr()); - Preprocessor PP(new PreprocessorOptions(), Diags, LangOpts, Target.getPtr(), - SourceMgr, HeaderInfo, ModLoader, - /*IILookup =*/ 0, - /*OwnsHeaderSearch =*/false, - /*DelayInitialization =*/ false); - PP.EnterMainSourceFile(); - - std::vector toks; - while (1) { - Token tok; - PP.Lex(tok); - if (tok.is(tok::eof)) - break; - toks.push_back(tok); +// The test fixture. +class LexerTest : public ::testing::Test { +protected: + LexerTest() + : FileMgr(FileMgrOpts), + DiagID(new DiagnosticIDs()), + Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()), + SourceMgr(Diags, FileMgr), + TargetOpts(new TargetOptions) + { + TargetOpts->Triple = "x86_64-apple-darwin11.1.0"; + Target = TargetInfo::CreateTargetInfo(Diags, &*TargetOpts); } - // Make sure we got the tokens that we expected. - ASSERT_EQ(10U, toks.size()); - ASSERT_EQ(tok::l_square, toks[0].getKind()); - ASSERT_EQ(tok::identifier, toks[1].getKind()); - ASSERT_EQ(tok::r_square, toks[2].getKind()); - ASSERT_EQ(tok::l_square, toks[3].getKind()); - ASSERT_EQ(tok::identifier, toks[4].getKind()); - ASSERT_EQ(tok::r_square, toks[5].getKind()); - ASSERT_EQ(tok::identifier, toks[6].getKind()); - ASSERT_EQ(tok::identifier, toks[7].getKind()); - ASSERT_EQ(tok::identifier, toks[8].getKind()); - ASSERT_EQ(tok::identifier, toks[9].getKind()); - + std::vector CheckLex(StringRef Source, + ArrayRef ExpectedTokens) { + MemoryBuffer *buf = MemoryBuffer::getMemBuffer(Source); + (void) SourceMgr.createMainFileIDForMemBuffer(buf); + + VoidModuleLoader ModLoader; + HeaderSearch HeaderInfo(new HeaderSearchOptions, FileMgr, Diags, LangOpts, + Target.getPtr()); + Preprocessor PP(new PreprocessorOptions(), Diags, LangOpts, Target.getPtr(), + SourceMgr, HeaderInfo, ModLoader, /*IILookup =*/ 0, + /*OwnsHeaderSearch =*/ false, + /*DelayInitialization =*/ false); + PP.EnterMainSourceFile(); + + std::vector toks; + while (1) { + Token tok; + PP.Lex(tok); + if (tok.is(tok::eof)) + break; + toks.push_back(tok); + } + + EXPECT_EQ(ExpectedTokens.size(), toks.size()); + for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) { + EXPECT_EQ(ExpectedTokens[i], toks[i].getKind()); + } + + return toks; + } + + std::string getSourceText(Token Begin, Token End) { + bool Invalid; + StringRef Str = + Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange( + Begin.getLocation(), End.getLocation())), + SourceMgr, LangOpts, &Invalid); + if (Invalid) + return ""; + return Str; + } + + FileSystemOptions FileMgrOpts; + FileManager FileMgr; + IntrusiveRefCntPtr DiagID; + DiagnosticsEngine Diags; + SourceManager SourceMgr; + LangOptions LangOpts; + IntrusiveRefCntPtr TargetOpts; + IntrusiveRefCntPtr Target; +}; + +TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::l_paren); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::r_paren); + + std::vector toks = CheckLex("#define M(x) x\n" + "M(f(M(i)))", + ExpectedTokens); + + EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2])); +} + +TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + + std::vector toks = CheckLex("#define M(x) x\n" + "M(M(i) c)", + ExpectedTokens); + + EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0])); +} + +TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + + std::vector toks = CheckLex("#define M(x) x\n" + "M(c c M(i))", + ExpectedTokens); + + EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2])); +} + +TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + + std::vector toks = CheckLex("#define M(x) x\n" + "M(M(i) c c)", + ExpectedTokens); + + EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1])); +} + +TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + + std::vector toks = CheckLex("#define M(x) x\n" + "M(c M(i)) M(M(i) c)", + ExpectedTokens); + + EXPECT_EQ("", getSourceText(toks[1], toks[2])); +} + +TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::l_paren); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::r_paren); + + std::vector toks = CheckLex("#define M(x) x\n" + "#define C(x) M(x##c)\n" + "M(f(C(i)))", + ExpectedTokens); + + EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2])); +} + +TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::l_paren); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::r_paren); + + std::vector toks = CheckLex("#define M(x) x\n" + "f(M(M(i)))", + ExpectedTokens); + EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2])); +} + +TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::l_paren); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::r_paren); + + std::vector toks = CheckLex("#define M(x) x\n" + "M(f(i))", + ExpectedTokens); + EXPECT_EQ("i", getSourceText(toks[2], toks[2])); +} + +TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::l_paren); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::r_paren); + + std::vector toks = CheckLex("#define M(x) x\n" + "#define C(x) x\n" + "f(C(M(i)))", + ExpectedTokens); + EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2])); +} + +TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::l_paren); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::r_paren); + + std::vector toks = CheckLex("#define M(x) x\n" + "#define C(x) c x\n" + "f(C(M(i)))", + ExpectedTokens); + EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3])); +} + +TEST_F(LexerTest, GetSourceTextExpandsRecursively) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::l_paren); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::r_paren); + + std::vector toks = CheckLex("#define M(x) x\n" + "#define C(x) c M(x)\n" + "C(f(M(i)))", + ExpectedTokens); + EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3])); +} + +TEST_F(LexerTest, LexAPI) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::l_square); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::r_square); + ExpectedTokens.push_back(tok::l_square); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::r_square); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + + std::vector toks = CheckLex("#define M(x) [x]\n" + "#define N(x) x\n" + "#define INN(x) x\n" + "#define NOF1 INN(val)\n" + "#define NOF2 val\n" + "M(foo) N([bar])\n" + "N(INN(val)) N(NOF1) N(NOF2) N(val)", + ExpectedTokens); + SourceLocation lsqrLoc = toks[0].getLocation(); SourceLocation idLoc = toks[1].getLocation(); SourceLocation rsqrLoc = toks[2].getLocation();