llvm-project/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp

220 lines
7.6 KiB
C++

//===--- LexerUtils.cpp - clang-tidy---------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "LexerUtils.h"
#include "clang/AST/AST.h"
#include "clang/Basic/SourceManager.h"
namespace clang {
namespace tidy {
namespace utils {
namespace lexer {
Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
const LangOptions &LangOpts, bool SkipComments) {
Token Token;
Token.setKind(tok::unknown);
Location = Location.getLocWithOffset(-1);
if (Location.isInvalid())
return Token;
auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location));
while (Location != StartOfFile) {
Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts);
if (!Lexer::getRawToken(Location, Token, SM, LangOpts) &&
(!SkipComments || !Token.is(tok::comment))) {
break;
}
Location = Location.getLocWithOffset(-1);
}
return Token;
}
SourceLocation findPreviousTokenStart(SourceLocation Start,
const SourceManager &SM,
const LangOptions &LangOpts) {
if (Start.isInvalid() || Start.isMacroID())
return SourceLocation();
SourceLocation BeforeStart = Start.getLocWithOffset(-1);
if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
return SourceLocation();
return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
}
SourceLocation findPreviousTokenKind(SourceLocation Start,
const SourceManager &SM,
const LangOptions &LangOpts,
tok::TokenKind TK) {
if (Start.isInvalid() || Start.isMacroID())
return SourceLocation();
while (true) {
SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
if (L.isInvalid() || L.isMacroID())
return SourceLocation();
Token T;
if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
return SourceLocation();
if (T.is(TK))
return T.getLocation();
Start = L;
}
}
SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
const LangOptions &LangOpts) {
return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
}
Optional<Token> findNextTokenSkippingComments(SourceLocation Start,
const SourceManager &SM,
const LangOptions &LangOpts) {
Optional<Token> CurrentToken;
do {
CurrentToken = Lexer::findNextToken(Start, SM, LangOpts);
} while (CurrentToken && CurrentToken->is(tok::comment));
return CurrentToken;
}
bool rangeContainsExpansionsOrDirectives(SourceRange Range,
const SourceManager &SM,
const LangOptions &LangOpts) {
assert(Range.isValid() && "Invalid Range for relexing provided");
SourceLocation Loc = Range.getBegin();
while (Loc < Range.getEnd()) {
if (Loc.isMacroID())
return true;
llvm::Optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);
if (!Tok)
return true;
if (Tok->is(tok::hash))
return true;
Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts).getLocWithOffset(1);
}
return false;
}
llvm::Optional<Token> getQualifyingToken(tok::TokenKind TK,
CharSourceRange Range,
const ASTContext &Context,
const SourceManager &SM) {
assert((TK == tok::kw_const || TK == tok::kw_volatile ||
TK == tok::kw_restrict) &&
"TK is not a qualifier keyword");
std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin());
StringRef File = SM.getBufferData(LocInfo.first);
Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
File.begin(), File.data() + LocInfo.second, File.end());
llvm::Optional<Token> LastMatchBeforeTemplate;
llvm::Optional<Token> LastMatchAfterTemplate;
bool SawTemplate = false;
Token Tok;
while (!RawLexer.LexFromRawLexer(Tok) &&
Range.getEnd() != Tok.getLocation() &&
!SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
if (Tok.is(tok::raw_identifier)) {
IdentifierInfo &Info = Context.Idents.get(
StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
Tok.setIdentifierInfo(&Info);
Tok.setKind(Info.getTokenID());
}
if (Tok.is(tok::less))
SawTemplate = true;
else if (Tok.isOneOf(tok::greater, tok::greatergreater))
LastMatchAfterTemplate = None;
else if (Tok.is(TK)) {
if (SawTemplate)
LastMatchAfterTemplate = Tok;
else
LastMatchBeforeTemplate = Tok;
}
}
return LastMatchAfterTemplate != None ? LastMatchAfterTemplate
: LastMatchBeforeTemplate;
}
static bool breakAndReturnEnd(const Stmt &S) {
return isa<CompoundStmt, DeclStmt, NullStmt>(S);
}
static bool breakAndReturnEndPlus1Token(const Stmt &S) {
return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt, SEHLeaveStmt>(S);
}
// Given a Stmt which does not include it's semicolon this method returns the
// SourceLocation of the semicolon.
static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
const SourceManager &SM,
const LangOptions &LangOpts) {
if (EndLoc.isMacroID()) {
// Assuming EndLoc points to a function call foo within macro F.
// This method is supposed to return location of the semicolon within
// those macro arguments:
// F ( foo() ; )
// ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc
const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
Optional<Token> NextTok =
findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);
// Was the next token found successfully?
// All macro issues are simply resolved by ensuring it's a semicolon.
if (NextTok && NextTok->is(tok::TokenKind::semi)) {
// Ideally this would return `F` with spelling location `;` (NextTok)
// following the example above. For now simply return NextTok location.
return NextTok->getLocation();
}
// Fallthrough to 'normal handling'.
// F ( foo() ) ;
// ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc
}
Optional<Token> NextTok = findNextTokenSkippingComments(EndLoc, SM, LangOpts);
// Testing for semicolon again avoids some issues with macros.
if (NextTok && NextTok->is(tok::TokenKind::semi))
return NextTok->getLocation();
return SourceLocation();
}
SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
const LangOptions &LangOpts) {
const Stmt *LastChild = &S;
while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
!breakAndReturnEndPlus1Token(*LastChild)) {
for (const Stmt *Child : LastChild->children())
LastChild = Child;
}
if (!breakAndReturnEnd(*LastChild) &&
breakAndReturnEndPlus1Token(*LastChild))
return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);
return S.getEndLoc();
}
} // namespace lexer
} // namespace utils
} // namespace tidy
} // namespace clang