[clang][dataflow] Implement a basic algorithm for dataflow analysis

This is part of the implementation of the dataflow analysis framework.
See "[RFC] A dataflow analysis framework for Clang AST" on cfe-dev.

Reviewed By: xazax.hun, gribozavr2

Differential Revision: https://reviews.llvm.org/D115235
This commit is contained in:
Stanislav Gatev 2021-12-10 10:37:07 +01:00 committed by Dmitri Gribenko
parent 350fe22f2a
commit 8dcaf3aa0b
7 changed files with 318 additions and 10 deletions

View File

@ -15,11 +15,20 @@
#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H
#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H
#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
namespace clang {
namespace dataflow {
/// Holds the state of the program (store and heap) at a given program point.
class Environment {};
class Environment {
public:
bool operator==(const Environment &) const { return true; }
LatticeJoinEffect join(const Environment &) {
return LatticeJoinEffect::Unchanged;
}
};
} // namespace dataflow
} // namespace clang

View File

@ -61,11 +61,12 @@ struct ReversePostOrderCompare {
/// the same block multiple times at once.
struct ForwardDataflowWorklist
: DataflowWorklistBase<ReversePostOrderCompare, 20> {
ForwardDataflowWorklist(const CFG &Cfg, PostOrderCFGView *POV)
: DataflowWorklistBase(Cfg, POV,
ReversePostOrderCompare{POV->getComparator()}) {}
ForwardDataflowWorklist(const CFG &Cfg, AnalysisDeclContext &Ctx)
: DataflowWorklistBase(
Cfg, Ctx.getAnalysis<PostOrderCFGView>(),
ReversePostOrderCompare{
Ctx.getAnalysis<PostOrderCFGView>()->getComparator()}) {}
: ForwardDataflowWorklist(Cfg, Ctx.getAnalysis<PostOrderCFGView>()) {}
void enqueueSuccessors(const CFGBlock *Block) {
for (auto B : Block->succs())

View File

@ -76,6 +76,20 @@ struct TypeErasedDataflowAnalysisState {
Environment Env;
};
/// Transfers the state of a basic block by evaluating each of its statements in
/// the context of `Analysis` and the states of its predecessors that are
/// available in `BlockStates`.
///
/// Requirements:
///
/// All predecessors of `Block` except those with loop back edges must have
/// already been transferred. States in `BlockStates` that are set to
/// `llvm::None` represent basic blocks that are not evaluated yet.
TypeErasedDataflowAnalysisState transferBlock(
std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates,
const CFGBlock &Block, const Environment &InitEnv,
TypeErasedDataflowAnalysis &Analysis);
/// Performs dataflow analysis and returns a mapping from basic block IDs to
/// dataflow analysis states that model the respective basic blocks. Indices
/// of the returned vector correspond to basic block IDs.

View File

@ -11,15 +11,77 @@
//
//===----------------------------------------------------------------------===//
#include <utility>
#include <vector>
#include "clang/Analysis/Analyses/PostOrderCFGView.h"
#include "clang/Analysis/CFG.h"
#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
#include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/Support/raw_ostream.h"
using namespace clang;
using namespace dataflow;
namespace clang {
namespace dataflow {
/// Computes the input state for a given basic block by joining the output
/// states of its predecessors.
///
/// Requirements:
///
/// All predecessors of `Block` except those with loop back edges must have
/// already been transferred. States in `BlockStates` that are set to
/// `llvm::None` represent basic blocks that are not evaluated yet.
static TypeErasedDataflowAnalysisState computeBlockInputState(
std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates,
const CFGBlock &Block, const Environment &InitEnv,
TypeErasedDataflowAnalysis &Analysis) {
// FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()`
// to enable building analyses like computation of dominators that initialize
// the state of each basic block differently.
TypeErasedDataflowAnalysisState State = {Analysis.typeErasedInitialElement(),
InitEnv};
for (const CFGBlock *Pred : Block.preds()) {
// Skip if the `Block` is unreachable or control flow cannot get past it.
if (!Pred || Pred->hasNoReturnElement())
continue;
// Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a
// loop back edge to `Block`.
const llvm::Optional<TypeErasedDataflowAnalysisState> &MaybePredState =
BlockStates[Pred->getBlockID()];
if (!MaybePredState.hasValue())
continue;
const TypeErasedDataflowAnalysisState &PredState =
MaybePredState.getValue();
Analysis.joinTypeErased(State.Lattice, PredState.Lattice);
State.Env.join(PredState.Env);
}
return State;
}
TypeErasedDataflowAnalysisState transferBlock(
std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates,
const CFGBlock &Block, const Environment &InitEnv,
TypeErasedDataflowAnalysis &Analysis) {
TypeErasedDataflowAnalysisState State =
computeBlockInputState(BlockStates, Block, InitEnv, Analysis);
for (const CFGElement &Element : Block) {
// FIXME: Evaluate other kinds of `CFGElement`.
const llvm::Optional<CFGStmt> Stmt = Element.getAs<CFGStmt>();
if (!Stmt.hasValue())
continue;
// FIXME: Evaluate the statement contained in `Stmt`.
State.Lattice = Analysis.transferTypeErased(Stmt.getValue().getStmt(),
State.Lattice, State.Env);
}
return State;
}
std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>>
runTypeErasedDataflowAnalysis(const CFG &Cfg,
@ -29,7 +91,59 @@ runTypeErasedDataflowAnalysis(const CFG &Cfg,
// are specified in the header. This could be done by remembering
// what options were used to build `Cfg` and asserting on them here.
// FIXME: Implement work list-based algorithm to compute the fixed
// point of `Analysis::transform` for every basic block in `Cfg`.
return {};
PostOrderCFGView POV(&Cfg);
ForwardDataflowWorklist Worklist(Cfg, &POV);
std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates;
BlockStates.resize(Cfg.size(), llvm::None);
// The entry basic block doesn't contain statements so it can be skipped.
const CFGBlock &Entry = Cfg.getEntry();
BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(),
InitEnv};
Worklist.enqueueSuccessors(&Entry);
// Bugs in lattices and transfer functions can prevent the analysis from
// converging. To limit the damage (infinite loops) that these bugs can cause,
// limit the number of iterations.
// FIXME: Consider making the maximum number of iterations configurable.
// FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number
// of iterations, number of functions that time out, etc.
unsigned Iterations = 0;
static constexpr unsigned MaxIterations = 1 << 16;
while (const CFGBlock *Block = Worklist.dequeue()) {
if (++Iterations > MaxIterations) {
llvm::errs() << "Maximum number of iterations reached, giving up.\n";
break;
}
const llvm::Optional<TypeErasedDataflowAnalysisState> &OldBlockState =
BlockStates[Block->getBlockID()];
TypeErasedDataflowAnalysisState NewBlockState =
transferBlock(BlockStates, *Block, InitEnv, Analysis);
if (OldBlockState.hasValue() &&
Analysis.isEqualTypeErased(OldBlockState.getValue().Lattice,
NewBlockState.Lattice) &&
OldBlockState->Env == NewBlockState.Env) {
// The state of `Block` didn't change after transfer so there's no need to
// revisit its successors.
continue;
}
BlockStates[Block->getBlockID()] = std::move(NewBlockState);
// Do not add unreachable successor blocks to `Worklist`.
if (Block->hasNoReturnElement())
continue;
Worklist.enqueueSuccessors(Block);
}
// FIXME: Consider evaluating unreachable basic blocks (those that have a
// state set to `llvm::None` at this point) to also analyze dead code.
return BlockStates;
}
} // namespace dataflow
} // namespace clang

View File

@ -28,3 +28,5 @@ target_link_libraries(ClangAnalysisTests
PRIVATE
LLVMTestingSupport
)
add_subdirectory(FlowSensitive)

View File

@ -0,0 +1,20 @@
set(LLVM_LINK_COMPONENTS
Support
)
add_clang_unittest(ClangAnalysisFlowSensitiveTests
TypeErasedDataflowAnalysisTest.cpp
)
clang_target_link_libraries(ClangAnalysisFlowSensitiveTests
PRIVATE
clangAnalysis
clangAnalysisFlowSensitive
clangAST
clangASTMatchers
clangBasic
clangFrontend
clangTesting
clangTooling
)

View File

@ -0,0 +1,148 @@
//===- unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp ===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/AST/Decl.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/Analysis/CFG.h"
#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h"
#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
#include "clang/Tooling/Tooling.h"
#include "llvm/ADT/StringRef.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <cassert>
#include <memory>
#include <vector>
using namespace clang;
using namespace dataflow;
template <typename AnalysisT>
class AnalysisCallback : public ast_matchers::MatchFinder::MatchCallback {
public:
void run(const ast_matchers::MatchFinder::MatchResult &Result) override {
assert(BlockStates.empty());
const auto *Func = Result.Nodes.getNodeAs<FunctionDecl>("func");
assert(Func != nullptr);
Stmt *Body = Func->getBody();
assert(Body != nullptr);
// FIXME: Consider providing a utility that returns a `CFG::BuildOptions`
// which is a good default for most clients or a utility that directly
// builds the `CFG` using default `CFG::BuildOptions`.
CFG::BuildOptions Options;
Options.AddImplicitDtors = true;
Options.AddTemporaryDtors = true;
Options.setAllAlwaysAdd();
std::unique_ptr<CFG> Cfg =
CFG::buildCFG(nullptr, Body, Result.Context, Options);
assert(Cfg != nullptr);
AnalysisT Analysis(*Result.Context);
Environment Env;
BlockStates = runDataflowAnalysis(*Cfg, Analysis, Env);
}
std::vector<
llvm::Optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>
BlockStates;
};
template <typename AnalysisT>
std::vector<llvm::Optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>
runAnalysis(llvm::StringRef Code) {
std::unique_ptr<ASTUnit> AST =
tooling::buildASTFromCodeWithArgs(Code, {"-std=c++11"});
AnalysisCallback<AnalysisT> Callback;
ast_matchers::MatchFinder Finder;
Finder.addMatcher(
ast_matchers::functionDecl(ast_matchers::hasName("target")).bind("func"),
&Callback);
Finder.matchAST(AST->getASTContext());
return Callback.BlockStates;
}
class NoopLattice {
public:
bool operator==(const NoopLattice &) const { return true; }
LatticeJoinEffect join(const NoopLattice &) {
return LatticeJoinEffect::Unchanged;
}
};
class NoopAnalysis : public DataflowAnalysis<NoopAnalysis, NoopLattice> {
public:
NoopAnalysis(ASTContext &Context)
: DataflowAnalysis<NoopAnalysis, NoopLattice>(Context) {}
static NoopLattice initialElement() { return {}; }
NoopLattice transfer(const Stmt *S, const NoopLattice &E, Environment &Env) {
return {};
}
};
TEST(DataflowAnalysisTest, NoopAnalysis) {
auto BlockStates = runAnalysis<NoopAnalysis>(R"(
void target() {}
)");
EXPECT_EQ(BlockStates.size(), 2u);
EXPECT_TRUE(BlockStates[0].hasValue());
EXPECT_TRUE(BlockStates[1].hasValue());
}
struct NonConvergingLattice {
int State;
bool operator==(const NonConvergingLattice &Other) const {
return State == Other.State;
}
LatticeJoinEffect join(const NonConvergingLattice &Other) {
if (Other.State == 0)
return LatticeJoinEffect::Unchanged;
State += Other.State;
return LatticeJoinEffect::Changed;
}
};
class NonConvergingAnalysis
: public DataflowAnalysis<NonConvergingAnalysis, NonConvergingLattice> {
public:
explicit NonConvergingAnalysis(ASTContext &Context)
: DataflowAnalysis<NonConvergingAnalysis, NonConvergingLattice>(Context) {
}
static NonConvergingLattice initialElement() { return {0}; }
NonConvergingLattice transfer(const Stmt *S, const NonConvergingLattice &E,
Environment &Env) {
return {E.State + 1};
}
};
TEST(DataflowAnalysisTest, NonConvergingAnalysis) {
auto BlockStates = runAnalysis<NonConvergingAnalysis>(R"(
void target() {
while(true) {}
}
)");
EXPECT_EQ(BlockStates.size(), 4u);
EXPECT_FALSE(BlockStates[0].hasValue());
EXPECT_TRUE(BlockStates[1].hasValue());
EXPECT_TRUE(BlockStates[2].hasValue());
EXPECT_TRUE(BlockStates[3].hasValue());
}