[clang][dataflow] Analyze calls to in-TU functions

This patch adds initial support for context-sensitive analysis of simple functions whose definition is available in the translation unit, guarded by the `ContextSensitive` flag in the new `TransferOptions` struct. When this option is true, the `VisitCallExpr` case in the builtin transfer function has a fallthrough case which checks for a direct callee with a body. In that case, it constructs a CFG from that callee body, uses the new `pushCall` method on the `Environment` to make an environment to analyze the callee, and then calls `runDataflowAnalysis` with a `NoopAnalysis` (disabling context-sensitive analysis on that sub-analysis, to avoid problems with recursion). After the sub-analysis completes, the `Environment` from its exit block is simply assigned back to the environment at the callsite.

The `pushCall` method (which currently only supports non-method functions with some restrictions) maps the `SourceLocation`s for all the parameters to the existing source locations for the corresponding arguments from the callsite.

This patch adds a few tests to check that this context-sensitive analysis works on simple functions. More sophisticated functionality will be added later; the most important next step is to explicitly model context in some fields of the `DataflowAnalysisContext` class, as mentioned in a `FIXME` comment in the `pushCall` implementation.

Reviewed By: ymandel, xazax.hun

Differential Revision: https://reviews.llvm.org/D130306
This commit is contained in:
Sam Estep 2022-07-26 17:54:13 +00:00
parent bc165de1c7
commit 300fbf56f8
7 changed files with 224 additions and 19 deletions

View File

@ -128,6 +128,21 @@ public:
/// with a symbolic representation of the `this` pointee.
Environment(DataflowAnalysisContext &DACtx, const DeclContext &DeclCtx);
/// Creates and returns an environment to use for an inline analysis of the
/// callee. Uses the storage location from each argument in the `Call` as the
/// storage location for the corresponding parameter in the callee.
///
/// Requirements:
///
/// The callee of `Call` must be a `FunctionDecl` with a body.
///
/// The body of the callee must not reference globals.
///
/// The arguments of `Call` must map 1:1 to the callee's parameters.
///
/// Each argument of `Call` must already have a `StorageLocation`.
Environment pushCall(const CallExpr *Call) const;
/// Returns true if and only if the environment is equivalent to `Other`, i.e
/// the two environments:
/// - have the same mappings from declarations to storage locations,

View File

@ -20,6 +20,12 @@
namespace clang {
namespace dataflow {
struct TransferOptions {
/// Determines whether to analyze function bodies when present in the
/// translation unit.
bool ContextSensitive = false;
};
/// Maps statements to the environments of basic blocks that contain them.
class StmtToEnvMap {
public:
@ -36,7 +42,8 @@ public:
/// Requirements:
///
/// `S` must not be `ParenExpr` or `ExprWithCleanups`.
void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env);
void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env,
TransferOptions Options);
} // namespace dataflow
} // namespace clang

View File

@ -23,6 +23,7 @@
#include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
#include "clang/Analysis/FlowSensitive/Transfer.h"
#include "llvm/ADT/Any.h"
#include "llvm/ADT/Optional.h"
#include "llvm/Support/Error.h"
@ -36,6 +37,9 @@ struct DataflowAnalysisOptions {
// (at which point the built-in transfer functions can be simply a standalone
// analysis).
bool ApplyBuiltinTransfer = true;
/// Only has an effect if `ApplyBuiltinTransfer` is true.
TransferOptions BuiltinTransferOptions;
};
/// Type-erased lattice element container.
@ -57,7 +61,7 @@ public:
/// Deprecated. Use the `DataflowAnalysisOptions` constructor instead.
TypeErasedDataflowAnalysis(bool ApplyBuiltinTransfer)
: Options({ApplyBuiltinTransfer}) {}
: Options({ApplyBuiltinTransfer, TransferOptions{}}) {}
TypeErasedDataflowAnalysis(DataflowAnalysisOptions Options)
: Options(Options) {}
@ -90,6 +94,11 @@ public:
/// Determines whether to apply the built-in transfer functions, which model
/// the heap and stack in the `Environment`.
bool applyBuiltinTransfer() const { return Options.ApplyBuiltinTransfer; }
/// Returns the options to be passed to the built-in transfer functions.
TransferOptions builtinTransferOptions() const {
return Options.BuiltinTransferOptions;
}
};
/// Type-erased model of the program at a given program point.

View File

@ -200,6 +200,44 @@ Environment::Environment(DataflowAnalysisContext &DACtx,
}
}
Environment Environment::pushCall(const CallExpr *Call) const {
Environment Env(*this);
// FIXME: Currently this only works if the callee is never a method and the
// same callee is never analyzed from multiple separate callsites. To
// generalize this, we'll need to store a "context" field (probably a stack of
// `const CallExpr *`s) in the `Environment`, and then change the
// `DataflowAnalysisContext` class to hold a map from contexts to "frames",
// where each frame stores its own version of what are currently the
// `DeclToLoc`, `ExprToLoc`, and `ThisPointeeLoc` fields.
const auto *FuncDecl = Call->getDirectCallee();
assert(FuncDecl != nullptr);
const auto *Body = FuncDecl->getBody();
assert(Body != nullptr);
// FIXME: In order to allow the callee to reference globals, we probably need
// to call `initGlobalVars` here in some way.
auto ParamIt = FuncDecl->param_begin();
auto ParamEnd = FuncDecl->param_end();
auto ArgIt = Call->arg_begin();
auto ArgEnd = Call->arg_end();
// FIXME: Parameters don't always map to arguments 1:1; examples include
// overloaded operators implemented as member functions, and parameter packs.
for (; ArgIt != ArgEnd; ++ParamIt, ++ArgIt) {
assert(ParamIt != ParamEnd);
const VarDecl *Param = *ParamIt;
const Expr *Arg = *ArgIt;
auto *ArgLoc = Env.getStorageLocation(*Arg, SkipPast::Reference);
assert(ArgLoc != nullptr);
Env.setStorageLocation(*Param, *ArgLoc);
}
return Env;
}
bool Environment::equivalentTo(const Environment &Other,
Environment::ValueModel &Model) const {
assert(DACtx == Other.DACtx);

View File

@ -20,7 +20,9 @@
#include "clang/AST/OperationKinds.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
#include "clang/Analysis/FlowSensitive/NoopAnalysis.h"
#include "clang/Analysis/FlowSensitive/Value.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/OperatorKinds.h"
@ -46,8 +48,9 @@ static BoolValue &evaluateBooleanEquality(const Expr &LHS, const Expr &RHS,
class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
public:
TransferVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env)
: StmtToEnv(StmtToEnv), Env(Env) {}
TransferVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env,
TransferOptions Options)
: StmtToEnv(StmtToEnv), Env(Env), Options(Options) {}
void VisitBinaryOperator(const BinaryOperator *S) {
const Expr *LHS = S->getLHS();
@ -503,6 +506,35 @@ public:
if (ArgLoc == nullptr)
return;
Env.setStorageLocation(*S, *ArgLoc);
} else if (const FunctionDecl *F = S->getDirectCallee()) {
// This case is for context-sensitive analysis, which we only do if we
// have the callee body available in the translation unit.
if (!Options.ContextSensitive || F->getBody() == nullptr)
return;
auto &ASTCtx = F->getASTContext();
// FIXME: Cache these CFGs.
auto CFCtx = ControlFlowContext::build(F, F->getBody(), &ASTCtx);
// FIXME: Handle errors here and below.
assert(CFCtx);
auto ExitBlock = CFCtx->getCFG().getExit().getBlockID();
auto CalleeEnv = Env.pushCall(S);
// FIXME: Use the same analysis as the caller for the callee.
DataflowAnalysisOptions Options;
auto Analysis = NoopAnalysis(ASTCtx, Options);
auto BlockToOutputState =
dataflow::runDataflowAnalysis(*CFCtx, Analysis, CalleeEnv);
assert(BlockToOutputState);
assert(ExitBlock < BlockToOutputState->size());
auto ExitState = (*BlockToOutputState)[ExitBlock];
assert(ExitState);
Env = ExitState->Env;
}
}
@ -633,10 +665,12 @@ private:
const StmtToEnvMap &StmtToEnv;
Environment &Env;
TransferOptions Options;
};
void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env) {
TransferVisitor(StmtToEnv, Env).Visit(&S);
void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env,
TransferOptions Options) {
TransferVisitor(StmtToEnv, Env, Options).Visit(&S);
}
} // namespace dataflow

View File

@ -74,8 +74,9 @@ static int blockIndexInPredecessor(const CFGBlock &Pred,
class TerminatorVisitor : public ConstStmtVisitor<TerminatorVisitor> {
public:
TerminatorVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env,
int BlockSuccIdx)
: StmtToEnv(StmtToEnv), Env(Env), BlockSuccIdx(BlockSuccIdx) {}
int BlockSuccIdx, TransferOptions TransferOpts)
: StmtToEnv(StmtToEnv), Env(Env), BlockSuccIdx(BlockSuccIdx),
TransferOpts(TransferOpts) {}
void VisitIfStmt(const IfStmt *S) {
auto *Cond = S->getCond();
@ -118,7 +119,7 @@ private:
void extendFlowCondition(const Expr &Cond) {
// The terminator sub-expression might not be evaluated.
if (Env.getStorageLocation(Cond, SkipPast::None) == nullptr)
transfer(StmtToEnv, Cond, Env);
transfer(StmtToEnv, Cond, Env, TransferOpts);
// FIXME: The flow condition must be an r-value, so `SkipPast::None` should
// suffice.
@ -150,6 +151,7 @@ private:
const StmtToEnvMap &StmtToEnv;
Environment &Env;
int BlockSuccIdx;
TransferOptions TransferOpts;
};
/// Computes the input state for a given basic block by joining the output
@ -217,7 +219,8 @@ static TypeErasedDataflowAnalysisState computeBlockInputState(
if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) {
const StmtToEnvMapImpl StmtToEnv(CFCtx, BlockStates);
TerminatorVisitor(StmtToEnv, PredState.Env,
blockIndexInPredecessor(*Pred, Block))
blockIndexInPredecessor(*Pred, Block),
Analysis.builtinTransferOptions())
.Visit(PredTerminatorStmt);
}
}
@ -253,7 +256,8 @@ static void transferCFGStmt(
assert(S != nullptr);
if (Analysis.applyBuiltinTransfer())
transfer(StmtToEnvMapImpl(CFCtx, BlockStates), *S, State.Env);
transfer(StmtToEnvMapImpl(CFCtx, BlockStates), *S, State.Env,
Analysis.builtinTransferOptions());
Analysis.transferTypeErased(S, State.Lattice, State.Env);
if (HandleTransferredStmt != nullptr)

View File

@ -39,14 +39,14 @@ using ::testing::SizeIs;
template <typename Matcher>
void runDataflow(llvm::StringRef Code, Matcher Match,
LangStandard::Kind Std = LangStandard::lang_cxx17,
bool ApplyBuiltinTransfer = true,
llvm::StringRef TargetFun = "target") {
DataflowAnalysisOptions Options,
LangStandard::Kind Std = LangStandard::lang_cxx17,
llvm::StringRef TargetFun = "target") {
ASSERT_THAT_ERROR(
test::checkDataflow<NoopAnalysis>(
Code, TargetFun,
[ApplyBuiltinTransfer](ASTContext &C, Environment &) {
return NoopAnalysis(C, ApplyBuiltinTransfer);
[Options](ASTContext &C, Environment &) {
return NoopAnalysis(C, Options);
},
[&Match](
llvm::ArrayRef<
@ -54,12 +54,19 @@ void runDataflow(llvm::StringRef Code, Matcher Match,
Results,
ASTContext &ASTCtx) { Match(Results, ASTCtx); },
{"-fsyntax-only", "-fno-delayed-template-parsing",
"-std=" +
std::string(
LangStandard::getLangStandardForKind(Std).getName())}),
"-std=" + std::string(
LangStandard::getLangStandardForKind(Std).getName())}),
llvm::Succeeded());
}
template <typename Matcher>
void runDataflow(llvm::StringRef Code, Matcher Match,
LangStandard::Kind Std = LangStandard::lang_cxx17,
bool ApplyBuiltinTransfer = true,
llvm::StringRef TargetFun = "target") {
runDataflow(Code, Match, {ApplyBuiltinTransfer, {}}, Std, TargetFun);
}
TEST(TransferTest, IntVarDeclNotTrackedWhenTransferDisabled) {
std::string Code = R"(
void target() {
@ -3862,4 +3869,95 @@ TEST(TransferTest, ForStmtBranchWithoutConditionDoesNotExtendFlowCondition) {
});
}
TEST(TransferTest, ContextSensitiveOptionDisabled) {
std::string Code = R"(
bool GiveBool();
void SetBool(bool &Var) { Var = true; }
void target() {
bool Foo = GiveBool();
SetBool(Foo);
// [[p]]
}
)";
runDataflow(Code,
[](llvm::ArrayRef<
std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
Results,
ASTContext &ASTCtx) {
ASSERT_THAT(Results, ElementsAre(Pair("p", _)));
const Environment &Env = Results[0].second.Env;
const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
ASSERT_THAT(FooDecl, NotNull());
auto &FooVal =
*cast<BoolValue>(Env.getValue(*FooDecl, SkipPast::None));
EXPECT_FALSE(Env.flowConditionImplies(FooVal));
EXPECT_FALSE(Env.flowConditionImplies(Env.makeNot(FooVal)));
},
{/*.ApplyBuiltinTransfer=*/true,
/*.BuiltinTransferOptions=*/{/*.ContextSensitive=*/false}});
}
TEST(TransferTest, ContextSensitiveSetTrue) {
std::string Code = R"(
bool GiveBool();
void SetBool(bool &Var) { Var = true; }
void target() {
bool Foo = GiveBool();
SetBool(Foo);
// [[p]]
}
)";
runDataflow(Code,
[](llvm::ArrayRef<
std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
Results,
ASTContext &ASTCtx) {
ASSERT_THAT(Results, ElementsAre(Pair("p", _)));
const Environment &Env = Results[0].second.Env;
const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
ASSERT_THAT(FooDecl, NotNull());
auto &FooVal =
*cast<BoolValue>(Env.getValue(*FooDecl, SkipPast::None));
EXPECT_TRUE(Env.flowConditionImplies(FooVal));
},
{/*.ApplyBuiltinTransfer=*/true,
/*.BuiltinTransferOptions=*/{/*.ContextSensitive=*/true}});
}
TEST(TransferTest, ContextSensitiveSetFalse) {
std::string Code = R"(
bool GiveBool();
void SetBool(bool &Var) { Var = false; }
void target() {
bool Foo = GiveBool();
SetBool(Foo);
// [[p]]
}
)";
runDataflow(Code,
[](llvm::ArrayRef<
std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
Results,
ASTContext &ASTCtx) {
ASSERT_THAT(Results, ElementsAre(Pair("p", _)));
const Environment &Env = Results[0].second.Env;
const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
ASSERT_THAT(FooDecl, NotNull());
auto &FooVal =
*cast<BoolValue>(Env.getValue(*FooDecl, SkipPast::None));
EXPECT_TRUE(Env.flowConditionImplies(Env.makeNot(FooVal)));
},
{/*.ApplyBuiltinTransfer=*/true,
/*.BuiltinTransferOptions=*/{/*.ContextSensitive=*/true}});
}
} // namespace