[clang][dataflow] Add framework for testing analyses.

Adds a general-purpose framework to support testing of dataflow analyses.

Differential Revision: https://reviews.llvm.org/D115341
This commit is contained in:
Yitzhak Mandelbaum 2021-11-16 16:57:26 +00:00
parent 9c244a33e7
commit 5a40df6381
6 changed files with 540 additions and 3 deletions

View File

@ -78,7 +78,8 @@ struct TypeErasedDataflowAnalysisState {
/// Transfers the state of a basic block by evaluating each of its statements in /// Transfers the state of a basic block by evaluating each of its statements in
/// the context of `Analysis` and the states of its predecessors that are /// the context of `Analysis` and the states of its predecessors that are
/// available in `BlockStates`. /// available in `BlockStates`. `HandleTransferredStmt` (if provided) will be
/// applied to each statement in the block, after it is evaluated.
/// ///
/// Requirements: /// Requirements:
/// ///
@ -88,7 +89,10 @@ struct TypeErasedDataflowAnalysisState {
TypeErasedDataflowAnalysisState transferBlock( TypeErasedDataflowAnalysisState transferBlock(
std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates, std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates,
const CFGBlock &Block, const Environment &InitEnv, const CFGBlock &Block, const Environment &InitEnv,
TypeErasedDataflowAnalysis &Analysis); TypeErasedDataflowAnalysis &Analysis,
std::function<void(const CFGStmt &,
const TypeErasedDataflowAnalysisState &)>
HandleTransferredStmt = nullptr);
/// Performs dataflow analysis and returns a mapping from basic block IDs to /// Performs dataflow analysis and returns a mapping from basic block IDs to
/// dataflow analysis states that model the respective basic blocks. Indices /// dataflow analysis states that model the respective basic blocks. Indices

View File

@ -66,7 +66,10 @@ static TypeErasedDataflowAnalysisState computeBlockInputState(
TypeErasedDataflowAnalysisState transferBlock( TypeErasedDataflowAnalysisState transferBlock(
std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates, std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates,
const CFGBlock &Block, const Environment &InitEnv, const CFGBlock &Block, const Environment &InitEnv,
TypeErasedDataflowAnalysis &Analysis) { TypeErasedDataflowAnalysis &Analysis,
std::function<void(const CFGStmt &,
const TypeErasedDataflowAnalysisState &)>
HandleTransferredStmt) {
TypeErasedDataflowAnalysisState State = TypeErasedDataflowAnalysisState State =
computeBlockInputState(BlockStates, Block, InitEnv, Analysis); computeBlockInputState(BlockStates, Block, InitEnv, Analysis);
for (const CFGElement &Element : Block) { for (const CFGElement &Element : Block) {
@ -79,6 +82,8 @@ TypeErasedDataflowAnalysisState transferBlock(
State.Lattice = Analysis.transferTypeErased(Stmt.getValue().getStmt(), State.Lattice = Analysis.transferTypeErased(Stmt.getValue().getStmt(),
State.Lattice, State.Env); State.Lattice, State.Env);
if (HandleTransferredStmt != nullptr)
HandleTransferredStmt(Stmt.getValue(), State);
} }
return State; return State;
} }

View File

@ -3,6 +3,8 @@ set(LLVM_LINK_COMPONENTS
) )
add_clang_unittest(ClangAnalysisFlowSensitiveTests add_clang_unittest(ClangAnalysisFlowSensitiveTests
TestingSupport.cpp
TestingSupportTest.cpp
TypeErasedDataflowAnalysisTest.cpp TypeErasedDataflowAnalysisTest.cpp
) )
@ -14,8 +16,13 @@ clang_target_link_libraries(ClangAnalysisFlowSensitiveTests
clangASTMatchers clangASTMatchers
clangBasic clangBasic
clangFrontend clangFrontend
clangLex
clangSerialization clangSerialization
clangTesting clangTesting
clangTooling clangTooling
) )
target_link_libraries(ClangAnalysisFlowSensitiveTests
PRIVATE
LLVMTestingSupport
)

View File

@ -0,0 +1,170 @@
#include "TestingSupport.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/Stmt.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/Analysis/CFG.h"
#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h"
#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Lex/Lexer.h"
#include "clang/Serialization/PCHContainerOperations.h"
#include "clang/Tooling/ArgumentsAdjusters.h"
#include "clang/Tooling/Tooling.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/Support/Error.h"
#include "llvm/Testing/Support/Annotations.h"
#include "gtest/gtest.h"
#include <functional>
#include <memory>
#include <string>
#include <system_error>
#include <utility>
#include <vector>
using namespace clang;
using namespace dataflow;
namespace {
using ast_matchers::MatchFinder;
class FindTranslationUnitCallback : public MatchFinder::MatchCallback {
public:
explicit FindTranslationUnitCallback(
std::function<void(ASTContext &)> Operation)
: Operation{Operation} {}
void run(const MatchFinder::MatchResult &Result) override {
const auto *TU = Result.Nodes.getNodeAs<TranslationUnitDecl>("tu");
if (TU->getASTContext().getDiagnostics().getClient()->getNumErrors() != 0) {
FAIL() << "Source file has syntax or type errors, they were printed to "
"the test log";
}
Operation(TU->getASTContext());
}
std::function<void(ASTContext &)> Operation;
};
} // namespace
static bool
isAnnotationDirectlyAfterStatement(const Stmt *Stmt, unsigned AnnotationBegin,
const SourceManager &SourceManager,
const LangOptions &LangOptions) {
auto NextToken =
Lexer::findNextToken(Stmt->getEndLoc(), SourceManager, LangOptions);
while (NextToken.hasValue() &&
SourceManager.getFileOffset(NextToken->getLocation()) <
AnnotationBegin) {
if (NextToken->isNot(tok::semi))
return false;
NextToken = Lexer::findNextToken(NextToken->getEndLoc(), SourceManager,
LangOptions);
}
return true;
}
llvm::Expected<llvm::DenseMap<const Stmt *, std::string>>
clang::dataflow::testing::buildStatementToAnnotationMapping(
const FunctionDecl *Func, llvm::Annotations AnnotatedCode) {
llvm::DenseMap<const Stmt *, std::string> Result;
using namespace ast_matchers; // NOLINT: Too many names
auto StmtMatcher =
findAll(stmt(unless(anyOf(hasParent(expr()), hasParent(returnStmt()))))
.bind("stmt"));
// This map should stay sorted because the binding algorithm relies on the
// ordering of statement offsets
std::map<unsigned, const Stmt *> Stmts;
auto &Context = Func->getASTContext();
auto &SourceManager = Context.getSourceManager();
for (auto &Match : match(StmtMatcher, *Func->getBody(), Context)) {
const auto *S = Match.getNodeAs<Stmt>("stmt");
unsigned Offset = SourceManager.getFileOffset(S->getEndLoc());
Stmts[Offset] = S;
}
unsigned I = 0;
auto Annotations = AnnotatedCode.ranges();
std::reverse(Annotations.begin(), Annotations.end());
auto Code = AnnotatedCode.code();
for (auto OffsetAndStmt = Stmts.rbegin(); OffsetAndStmt != Stmts.rend();
OffsetAndStmt++) {
unsigned Offset = OffsetAndStmt->first;
const Stmt *Stmt = OffsetAndStmt->second;
if (I < Annotations.size() && Annotations[I].Begin >= Offset) {
auto Range = Annotations[I];
if (!isAnnotationDirectlyAfterStatement(Stmt, Range.Begin, SourceManager,
Context.getLangOpts())) {
return llvm::createStringError(
std::make_error_code(std::errc::invalid_argument),
"Annotation is not placed after a statement: %s",
SourceManager.getLocForStartOfFile(SourceManager.getMainFileID())
.getLocWithOffset(Offset)
.printToString(SourceManager)
.data());
}
Result[Stmt] = Code.slice(Range.Begin, Range.End).str();
I++;
if (I < Annotations.size() && Annotations[I].Begin >= Offset) {
return llvm::createStringError(
std::make_error_code(std::errc::invalid_argument),
"Multiple annotations bound to the statement at the location: %s",
Stmt->getBeginLoc().printToString(SourceManager).data());
}
}
}
if (I < Annotations.size()) {
return llvm::createStringError(
std::make_error_code(std::errc::invalid_argument),
"Not all annotations were bound to statements. Unbound annotation at: "
"%s",
SourceManager.getLocForStartOfFile(SourceManager.getMainFileID())
.getLocWithOffset(Annotations[I].Begin)
.printToString(SourceManager)
.data());
}
return Result;
}
std::pair<const FunctionDecl *, std::unique_ptr<CFG>>
clang::dataflow::testing::buildCFG(
ASTContext &Context,
ast_matchers::internal::Matcher<FunctionDecl> FuncMatcher) {
CFG::BuildOptions Options;
Options.PruneTriviallyFalseEdges = false;
Options.AddInitializers = true;
Options.AddImplicitDtors = true;
Options.AddTemporaryDtors = true;
Options.setAllAlwaysAdd();
const FunctionDecl *F = ast_matchers::selectFirst<FunctionDecl>(
"target",
ast_matchers::match(
ast_matchers::functionDecl(ast_matchers::isDefinition(), FuncMatcher)
.bind("target"),
Context));
if (F == nullptr)
return std::make_pair(nullptr, nullptr);
return std::make_pair(
F, clang::CFG::buildCFG(F, F->getBody(), &Context, Options));
}

View File

@ -0,0 +1,172 @@
//===--- DataflowValues.h - Data structure for dataflow values --*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines a skeleton data structure for encapsulating the dataflow
// values for a CFG. Typically this is subclassed to provide methods for
// computing these values from a CFG.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_ANALYSIS_FLOW_SENSITIVE_TESTING_SUPPORT_H_
#define LLVM_CLANG_ANALYSIS_FLOW_SENSITIVE_TESTING_SUPPORT_H_
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/Stmt.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/ASTMatchers/ASTMatchersInternal.h"
#include "clang/Analysis/CFG.h"
#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h"
#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
#include "clang/Basic/LLVM.h"
#include "clang/Tooling/Tooling.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include "llvm/Testing/Support/Annotations.h"
#include "gtest/gtest.h"
#include <functional>
#include <memory>
#include <string>
#include <utility>
namespace clang {
namespace dataflow {
// Requires a `<<` operator for the `Lattice` type.
// FIXME: move to a non-test utility library.
template <typename Lattice>
std::ostream &operator<<(std::ostream &OS,
const DataflowAnalysisState<Lattice> &S) {
std::string Separator = "";
OS << "{lattice=";
OS << S.Lattice;
// FIXME: add printing support for the environment.
OS << ", environment=...}";
return OS;
}
namespace testing {
// Returns assertions based on annotations that are present after statements in
// `AnnotatedCode`.
llvm::Expected<llvm::DenseMap<const Stmt *, std::string>>
buildStatementToAnnotationMapping(const FunctionDecl *Func,
llvm::Annotations AnnotatedCode);
// Creates a CFG from the body of the function that matches `func_matcher`,
// suitable to testing a dataflow analysis.
std::pair<const FunctionDecl *, std::unique_ptr<CFG>>
buildCFG(ASTContext &Context,
ast_matchers::internal::Matcher<FunctionDecl> FuncMatcher);
// Runs dataflow on the body of the function that matches `func_matcher` in code
// snippet `code`. Requires: `Analysis` contains a type `Lattice`.
template <typename AnalysisT>
void checkDataflow(
llvm::StringRef Code,
ast_matchers::internal::Matcher<FunctionDecl> FuncMatcher,
std::function<AnalysisT(ASTContext &, Environment &)> MakeAnalysis,
std::function<void(
llvm::ArrayRef<std::pair<
std::string, DataflowAnalysisState<typename AnalysisT::Lattice>>>,
ASTContext &)>
Expectations,
ArrayRef<std::string> Args,
const tooling::FileContentMappings &VirtualMappedFiles = {}) {
using StateT = DataflowAnalysisState<typename AnalysisT::Lattice>;
llvm::Annotations AnnotatedCode(Code);
auto Unit = tooling::buildASTFromCodeWithArgs(
AnnotatedCode.code(), {"-fsyntax-only", "-std=c++17"});
auto &Context = Unit->getASTContext();
if (Context.getDiagnostics().getClient()->getNumErrors() != 0) {
FAIL() << "Source file has syntax or type errors, they were printed to "
"the test log";
}
std::pair<const FunctionDecl *, std::unique_ptr<CFG>> CFGResult =
buildCFG(Context, FuncMatcher);
const auto *F = CFGResult.first;
auto Cfg = std::move(CFGResult.second);
ASSERT_TRUE(F != nullptr) << "Could not find target function";
ASSERT_TRUE(Cfg != nullptr) << "Could not build control flow graph.";
Environment Env;
auto Analysis = MakeAnalysis(Context, Env);
llvm::Expected<llvm::DenseMap<const clang::Stmt *, std::string>>
StmtToAnnotations = buildStatementToAnnotationMapping(F, AnnotatedCode);
if (auto E = StmtToAnnotations.takeError()) {
FAIL() << "Failed to build annotation map: "
<< llvm::toString(std::move(E));
return;
}
auto &Annotations = *StmtToAnnotations;
std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates =
runTypeErasedDataflowAnalysis(*Cfg, Analysis, Env);
if (BlockStates.empty()) {
Expectations({}, Context);
return;
}
// Compute a map from statement annotations to the state computed for
// the program point immediately after the annotated statement.
std::vector<std::pair<std::string, StateT>> Results;
for (const CFGBlock *Block : *Cfg) {
// Skip blocks that were not evaluated.
if (!BlockStates[Block->getBlockID()].hasValue())
continue;
transferBlock(
BlockStates, *Block, Env, Analysis,
[&Results, &Annotations](const clang::CFGStmt &Stmt,
const TypeErasedDataflowAnalysisState &State) {
auto It = Annotations.find(Stmt.getStmt());
if (It == Annotations.end())
return;
if (auto *Lattice = llvm::any_cast<typename AnalysisT::Lattice>(
&State.Lattice.Value)) {
Results.emplace_back(
It->second, StateT{std::move(*Lattice), std::move(State.Env)});
} else {
FAIL() << "Could not cast lattice element to expected type.";
}
});
}
Expectations(Results, Context);
}
// Runs dataflow on the body of the function named `target_fun` in code snippet
// `code`.
template <typename AnalysisT>
void checkDataflow(
llvm::StringRef Code, llvm::StringRef TargetFun,
std::function<AnalysisT(ASTContext &, Environment &)> MakeAnalysis,
std::function<void(
llvm::ArrayRef<std::pair<
std::string, DataflowAnalysisState<typename AnalysisT::Lattice>>>,
ASTContext &)>
Expectations,
ArrayRef<std::string> Args,
const tooling::FileContentMappings &VirtualMappedFiles = {}) {
checkDataflow(Code, ast_matchers::hasName(TargetFun), std::move(MakeAnalysis),
std::move(Expectations), Args, VirtualMappedFiles);
}
} // namespace testing
} // namespace dataflow
} // namespace clang
#endif // LLVM_CLANG_ANALYSIS_FLOW_SENSITIVE_TESTING_SUPPORT_H_

View File

@ -0,0 +1,179 @@
#include "TestingSupport.h"
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/Tooling/Tooling.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
using namespace clang;
using namespace dataflow;
namespace {
using ::clang::ast_matchers::functionDecl;
using ::clang::ast_matchers::hasName;
using ::clang::ast_matchers::isDefinition;
using ::testing::_;
using ::testing::IsEmpty;
using ::testing::Pair;
using ::testing::UnorderedElementsAre;
class NoopLattice {
public:
bool operator==(const NoopLattice &) const { return true; }
LatticeJoinEffect join(const NoopLattice &) {
return LatticeJoinEffect::Unchanged;
}
};
std::ostream &operator<<(std::ostream &OS, const NoopLattice &S) {
OS << "noop";
return OS;
}
class NoopAnalysis : public DataflowAnalysis<NoopAnalysis, NoopLattice> {
public:
NoopAnalysis(ASTContext &Context)
: DataflowAnalysis<NoopAnalysis, NoopLattice>(Context) {}
static NoopLattice initialElement() { return {}; }
NoopLattice transfer(const Stmt *S, const NoopLattice &E, Environment &Env) {
return {};
}
};
template <typename T>
const FunctionDecl *findTargetFunc(ASTContext &Context, T FunctionMatcher) {
auto TargetMatcher =
functionDecl(FunctionMatcher, isDefinition()).bind("target");
for (const auto &Node : ast_matchers::match(TargetMatcher, Context)) {
const auto *Func = Node.template getNodeAs<FunctionDecl>("target");
if (Func == nullptr)
continue;
if (Func->isTemplated())
continue;
return Func;
}
return nullptr;
}
class BuildStatementToAnnotationMappingTest : public ::testing::Test {
public:
void
runTest(llvm::StringRef Code, llvm::StringRef TargetName,
std::function<void(const llvm::DenseMap<const Stmt *, std::string> &)>
RunChecks) {
llvm::Annotations AnnotatedCode(Code);
auto Unit = tooling::buildASTFromCodeWithArgs(
AnnotatedCode.code(), {"-fsyntax-only", "-std=c++17"});
auto &Context = Unit->getASTContext();
const FunctionDecl *Func = findTargetFunc(Context, hasName(TargetName));
ASSERT_NE(Func, nullptr);
llvm::Expected<llvm::DenseMap<const Stmt *, std::string>> Mapping =
clang::dataflow::testing::buildStatementToAnnotationMapping(
Func, AnnotatedCode);
ASSERT_TRUE(static_cast<bool>(Mapping));
RunChecks(Mapping.get());
}
};
TEST_F(BuildStatementToAnnotationMappingTest, ReturnStmt) {
runTest(R"(
int target() {
return 42;
/*[[ok]]*/
}
)",
"target",
[](const llvm::DenseMap<const Stmt *, std::string> &Annotations) {
ASSERT_EQ(Annotations.size(), static_cast<unsigned int>(1));
EXPECT_TRUE(isa<ReturnStmt>(Annotations.begin()->first));
EXPECT_EQ(Annotations.begin()->second, "ok");
});
}
void checkDataflow(
llvm::StringRef Code, llvm::StringRef Target,
std::function<void(llvm::ArrayRef<std::pair<
std::string, DataflowAnalysisState<NoopLattice>>>,
ASTContext &)>
Expectations) {
clang::dataflow::testing::checkDataflow<NoopAnalysis>(
Code, Target,
[](ASTContext &Context, Environment &) { return NoopAnalysis(Context); },
std::move(Expectations), {"-fsyntax-only", "-std=c++17"});
}
TEST(ProgramPointAnnotations, NoAnnotations) {
::testing::MockFunction<void(
llvm::ArrayRef<
std::pair<std::string, DataflowAnalysisState<NoopLattice>>>,
ASTContext &)>
Expectations;
EXPECT_CALL(Expectations, Call(IsEmpty(), _)).Times(1);
checkDataflow("void target() {}", "target", Expectations.AsStdFunction());
}
TEST(ProgramPointAnnotations, NoAnnotationsDifferentTarget) {
::testing::MockFunction<void(
llvm::ArrayRef<
std::pair<std::string, DataflowAnalysisState<NoopLattice>>>,
ASTContext &)>
Expectations;
EXPECT_CALL(Expectations, Call(IsEmpty(), _)).Times(1);
checkDataflow("void fun() {}", "fun", Expectations.AsStdFunction());
}
TEST(ProgramPointAnnotations, WithCodepoint) {
::testing::MockFunction<void(
llvm::ArrayRef<
std::pair<std::string, DataflowAnalysisState<NoopLattice>>>,
ASTContext &)>
Expectations;
EXPECT_CALL(Expectations,
Call(UnorderedElementsAre(Pair("program-point", _)), _))
.Times(1);
checkDataflow(R"cc(void target() {
int n;
// [[program-point]]
})cc",
"target", Expectations.AsStdFunction());
}
TEST(ProgramPointAnnotations, MultipleCodepoints) {
::testing::MockFunction<void(
llvm::ArrayRef<
std::pair<std::string, DataflowAnalysisState<NoopLattice>>>,
ASTContext &)>
Expectations;
EXPECT_CALL(Expectations,
Call(UnorderedElementsAre(Pair("program-point-1", _),
Pair("program-point-2", _)),
_))
.Times(1);
checkDataflow(R"cc(void target(bool b) {
if (b) {
int n;
// [[program-point-1]]
} else {
int m;
// [[program-point-2]]
}
})cc",
"target", Expectations.AsStdFunction());
}
} // namespace