[clang][dataflow] Add transfer functions for logical and, or, not.

This is part of the implementation of the dataflow analysis framework.
See "[RFC] A dataflow analysis framework for Clang AST" on cfe-dev.

Reviewed-by: xazax.hun

Differential Revision: https://reviews.llvm.org/D119953
This commit is contained in:
Stanislav Gatev 2022-02-16 16:47:37 +00:00
parent 25f1d50ca5
commit dd4dde8d39
8 changed files with 334 additions and 32 deletions

View File

@ -34,8 +34,8 @@ namespace dataflow {
class DataflowAnalysisContext {
public:
DataflowAnalysisContext()
: TrueVal(&takeOwnership(std::make_unique<BoolValue>())),
FalseVal(&takeOwnership(std::make_unique<BoolValue>())) {}
: TrueVal(takeOwnership(std::make_unique<AtomicBoolValue>())),
FalseVal(takeOwnership(std::make_unique<AtomicBoolValue>())) {}
/// Takes ownership of `Loc` and returns a reference to it.
///
@ -115,8 +115,8 @@ public:
/// Returns a symbolic boolean value that models a boolean literal equal to
/// `Value`.
BoolValue &getBoolLiteralValue(bool Value) const {
return Value ? *TrueVal : *FalseVal;
AtomicBoolValue &getBoolLiteralValue(bool Value) const {
return Value ? TrueVal : FalseVal;
}
private:
@ -135,8 +135,8 @@ private:
StorageLocation *ThisPointeeLoc = nullptr;
// FIXME: Add support for boolean expressions.
BoolValue *TrueVal;
BoolValue *FalseVal;
AtomicBoolValue &TrueVal;
AtomicBoolValue &FalseVal;
};
} // namespace dataflow

View File

@ -226,7 +226,7 @@ public:
/// Returns a symbolic boolean value that models a boolean literal equal to
/// `Value`
BoolValue &getBoolLiteralValue(bool Value) const {
AtomicBoolValue &getBoolLiteralValue(bool Value) const {
return DACtx->getBoolLiteralValue(Value);
}

View File

@ -20,12 +20,23 @@
namespace clang {
namespace dataflow {
/// Maps statements to the environments of basic blocks that contain them.
class StmtToEnvMap {
public:
virtual ~StmtToEnvMap() = default;
/// Returns the environment of the basic block that contains `S` or nullptr if
/// there isn't one.
/// FIXME: Ensure that the result can't be null and return a const reference.
virtual const Environment *getEnvironment(const Stmt &S) const = 0;
};
/// Evaluates `S` and updates `Env` accordingly.
///
/// Requirements:
///
/// The type of `S` must not be `ParenExpr`.
void transfer(const Stmt &S, Environment &Env);
void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env);
} // namespace dataflow
} // namespace clang

View File

@ -28,7 +28,19 @@ namespace dataflow {
/// Base class for all values computed by abstract interpretation.
class Value {
public:
enum class Kind { Bool, Integer, Reference, Pointer, Struct };
enum class Kind {
Integer,
Reference,
Pointer,
Struct,
// Synthetic boolean values are either atomic values or composites that
// represent conjunctions, disjunctions, and negations.
AtomicBool,
Conjunction,
Disjunction,
Negation
};
explicit Value(Kind ValKind) : ValKind(ValKind) {}
@ -43,9 +55,88 @@ private:
/// Models a boolean.
class BoolValue : public Value {
public:
explicit BoolValue() : Value(Kind::Bool) {}
explicit BoolValue(Kind ValueKind) : Value(ValueKind) {}
static bool classof(const Value *Val) { return Val->getKind() == Kind::Bool; }
static bool classof(const Value *Val) {
return Val->getKind() == Kind::AtomicBool ||
Val->getKind() == Kind::Conjunction ||
Val->getKind() == Kind::Disjunction ||
Val->getKind() == Kind::Negation;
}
};
/// Models an atomic boolean.
class AtomicBoolValue : public BoolValue {
public:
explicit AtomicBoolValue() : BoolValue(Kind::AtomicBool) {}
static bool classof(const Value *Val) {
return Val->getKind() == Kind::AtomicBool;
}
};
/// Models a boolean conjunction.
// FIXME: Consider representing binary and unary boolean operations similar
// to how they are represented in the AST. This might become more pressing
// when such operations need to be added for other data types.
class ConjunctionValue : public BoolValue {
public:
explicit ConjunctionValue(BoolValue &LeftSubVal, BoolValue &RightSubVal)
: BoolValue(Kind::Conjunction), LeftSubVal(LeftSubVal),
RightSubVal(RightSubVal) {}
static bool classof(const Value *Val) {
return Val->getKind() == Kind::Conjunction;
}
/// Returns the left sub-value of the conjunction.
BoolValue &getLeftSubValue() const { return LeftSubVal; }
/// Returns the right sub-value of the conjunction.
BoolValue &getRightSubValue() const { return RightSubVal; }
private:
BoolValue &LeftSubVal;
BoolValue &RightSubVal;
};
/// Models a boolean disjunction.
class DisjunctionValue : public BoolValue {
public:
explicit DisjunctionValue(BoolValue &LeftSubVal, BoolValue &RightSubVal)
: BoolValue(Kind::Disjunction), LeftSubVal(LeftSubVal),
RightSubVal(RightSubVal) {}
static bool classof(const Value *Val) {
return Val->getKind() == Kind::Disjunction;
}
/// Returns the left sub-value of the disjunction.
BoolValue &getLeftSubValue() const { return LeftSubVal; }
/// Returns the right sub-value of the disjunction.
BoolValue &getRightSubValue() const { return RightSubVal; }
private:
BoolValue &LeftSubVal;
BoolValue &RightSubVal;
};
/// Models a boolean negation.
class NegationValue : public BoolValue {
public:
explicit NegationValue(BoolValue &SubVal)
: BoolValue(Kind::Negation), SubVal(SubVal) {}
static bool classof(const Value *Val) {
return Val->getKind() == Kind::Negation;
}
/// Returns the sub-value of the negation.
BoolValue &getSubVal() const { return SubVal; }
private:
BoolValue &SubVal;
};
/// Models an integer.

View File

@ -39,10 +39,12 @@ static const Expr *skipExprWithCleanups(const Expr *E) {
class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
public:
TransferVisitor(Environment &Env) : Env(Env) {}
TransferVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env)
: StmtToEnv(StmtToEnv), Env(Env) {}
void VisitBinaryOperator(const BinaryOperator *S) {
if (S->getOpcode() == BO_Assign) {
switch (S->getOpcode()) {
case BO_Assign: {
// The CFG does not contain `ParenExpr` as top-level statements in basic
// blocks, however sub-expressions can still be of that type.
assert(S->getLHS() != nullptr);
@ -51,7 +53,7 @@ public:
assert(LHS != nullptr);
auto *LHSLoc = Env.getStorageLocation(*LHS, SkipPast::Reference);
if (LHSLoc == nullptr)
return;
break;
// The CFG does not contain `ParenExpr` as top-level statements in basic
// blocks, however sub-expressions can still be of that type.
@ -61,15 +63,57 @@ public:
assert(RHS != nullptr);
Value *RHSVal = Env.getValue(*RHS, SkipPast::Reference);
if (RHSVal == nullptr)
return;
break;
// Assign a value to the storage location of the left-hand side.
Env.setValue(*LHSLoc, *RHSVal);
// Assign a storage location for the whole expression.
Env.setStorageLocation(*S, *LHSLoc);
break;
}
case BO_LAnd:
case BO_LOr: {
const Expr *LHS = S->getLHS();
assert(LHS != nullptr);
const Expr *RHS = S->getRHS();
assert(RHS != nullptr);
BoolValue *LHSVal =
dyn_cast_or_null<BoolValue>(Env.getValue(*LHS, SkipPast::Reference));
// `RHS` and `S` might be part of different basic blocks. We need to
// access their values from the corresponding environments.
BoolValue *RHSVal = nullptr;
const Environment *RHSEnv = StmtToEnv.getEnvironment(*RHS);
if (RHSEnv != nullptr)
RHSVal = dyn_cast_or_null<BoolValue>(
RHSEnv->getValue(*RHS, SkipPast::Reference));
// Create fresh values for unknown boolean expressions.
// FIXME: Consider providing a `GetOrCreateFresh` util in case this style
// is expected to be common or make sure that all expressions are assigned
// values and drop this.
if (LHSVal == nullptr)
LHSVal = &Env.takeOwnership(std::make_unique<AtomicBoolValue>());
if (RHSVal == nullptr)
RHSVal = &Env.takeOwnership(std::make_unique<AtomicBoolValue>());
auto &Loc = Env.createStorageLocation(*S);
Env.setStorageLocation(*S, Loc);
if (S->getOpcode() == BO_LAnd)
Env.setValue(Loc, Env.takeOwnership(std::make_unique<ConjunctionValue>(
*LHSVal, *RHSVal)));
else
Env.setValue(Loc, Env.takeOwnership(std::make_unique<DisjunctionValue>(
*LHSVal, *RHSVal)));
break;
}
default:
// FIXME: Add support for BO_EQ, BO_NE.
break;
}
// FIXME: Add support for BO_EQ, BO_NE.
}
void VisitDeclRefExpr(const DeclRefExpr *S) {
@ -212,8 +256,18 @@ public:
Env.setValue(PointerLoc, PointerVal);
break;
}
case UO_LNot: {
auto *SubExprVal =
dyn_cast_or_null<BoolValue>(Env.getValue(*SubExpr, SkipPast::None));
if (SubExprVal == nullptr)
return;
auto &ExprLoc = Env.createStorageLocation(*S);
Env.setStorageLocation(*S, ExprLoc);
Env.setValue(ExprLoc, Env.takeOwnership(
std::make_unique<NegationValue>(*SubExprVal)));
}
default:
// FIXME: Add support for UO_LNot.
break;
}
}
@ -450,12 +504,13 @@ public:
}
private:
const StmtToEnvMap &StmtToEnv;
Environment &Env;
};
void transfer(const Stmt &S, Environment &Env) {
void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env) {
assert(!isa<ParenExpr>(&S));
TransferVisitor(Env).Visit(&S);
TransferVisitor(StmtToEnv, Env).Visit(&S);
}
} // namespace dataflow

View File

@ -24,6 +24,7 @@
#include "clang/Analysis/FlowSensitive/Transfer.h"
#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
#include "clang/Analysis/FlowSensitive/Value.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
@ -32,6 +33,27 @@
namespace clang {
namespace dataflow {
class StmtToEnvMapImpl : public StmtToEnvMap {
public:
StmtToEnvMapImpl(
const ControlFlowContext &CFCtx,
llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>>
BlockToState)
: CFCtx(CFCtx), BlockToState(BlockToState) {}
const Environment *getEnvironment(const Stmt &S) const override {
auto BlockIT = CFCtx.getStmtToBlock().find(&S);
assert(BlockIT != CFCtx.getStmtToBlock().end());
const auto &State = BlockToState[BlockIT->getSecond()->getBlockID()];
assert(State.hasValue());
return &State.getValue().Env;
}
private:
const ControlFlowContext &CFCtx;
llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockToState;
};
/// Computes the input state for a given basic block by joining the output
/// states of its predecessors.
///
@ -42,7 +64,7 @@ namespace dataflow {
/// `llvm::None` represent basic blocks that are not evaluated yet.
static TypeErasedDataflowAnalysisState computeBlockInputState(
const ControlFlowContext &CFCtx,
std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates,
llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates,
const CFGBlock &Block, const Environment &InitEnv,
TypeErasedDataflowAnalysis &Analysis) {
llvm::DenseSet<const CFGBlock *> Preds;
@ -111,17 +133,19 @@ static TypeErasedDataflowAnalysisState computeBlockInputState(
/// Transfers `State` by evaluating `CfgStmt` in the context of `Analysis`.
/// `HandleTransferredStmt` (if provided) will be applied to `CfgStmt`, after it
/// is evaluated.
static void
transferCFGStmt(const CFGStmt &CfgStmt, TypeErasedDataflowAnalysis &Analysis,
TypeErasedDataflowAnalysisState &State,
std::function<void(const CFGStmt &,
const TypeErasedDataflowAnalysisState &)>
HandleTransferredStmt) {
static void transferCFGStmt(
const ControlFlowContext &CFCtx,
llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates,
const CFGStmt &CfgStmt, TypeErasedDataflowAnalysis &Analysis,
TypeErasedDataflowAnalysisState &State,
std::function<void(const CFGStmt &,
const TypeErasedDataflowAnalysisState &)>
HandleTransferredStmt) {
const Stmt *S = CfgStmt.getStmt();
assert(S != nullptr);
if (Analysis.applyBuiltinTransfer())
transfer(*S, State.Env);
transfer(StmtToEnvMapImpl(CFCtx, BlockStates), *S, State.Env);
Analysis.transferTypeErased(S, State.Lattice, State.Env);
if (HandleTransferredStmt != nullptr)
@ -176,8 +200,8 @@ TypeErasedDataflowAnalysisState transferBlock(
for (const CFGElement &Element : Block) {
switch (Element.getKind()) {
case CFGElement::Statement:
transferCFGStmt(*Element.getAs<CFGStmt>(), Analysis, State,
HandleTransferredStmt);
transferCFGStmt(CFCtx, BlockStates, *Element.getAs<CFGStmt>(), Analysis,
State, HandleTransferredStmt);
break;
case CFGElement::Initializer:
if (Analysis.applyBuiltinTransfer())

View File

@ -2006,6 +2006,42 @@ TEST_F(TransferTest, AssignFromBoolLiteral) {
// [[p]]
}
)";
runDataflow(Code,
[](llvm::ArrayRef<
std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
Results,
ASTContext &ASTCtx) {
ASSERT_THAT(Results, ElementsAre(Pair("p", _)));
const Environment &Env = Results[0].second.Env;
const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
ASSERT_THAT(FooDecl, NotNull());
const auto *FooVal = dyn_cast_or_null<AtomicBoolValue>(
Env.getValue(*FooDecl, SkipPast::None));
ASSERT_THAT(FooVal, NotNull());
const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar");
ASSERT_THAT(BarDecl, NotNull());
const auto *BarVal = dyn_cast_or_null<AtomicBoolValue>(
Env.getValue(*BarDecl, SkipPast::None));
ASSERT_THAT(BarVal, NotNull());
EXPECT_EQ(FooVal, &Env.getBoolLiteralValue(true));
EXPECT_EQ(BarVal, &Env.getBoolLiteralValue(false));
});
}
TEST_F(TransferTest, AssignFromBoolConjunction) {
std::string Code = R"(
void target() {
bool Foo = true;
bool Bar = true;
bool Baz = (Foo) && (Bar);
// [[p]]
}
)";
runDataflow(
Code, [](llvm::ArrayRef<
std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
@ -2028,9 +2064,93 @@ TEST_F(TransferTest, AssignFromBoolLiteral) {
dyn_cast_or_null<BoolValue>(Env.getValue(*BarDecl, SkipPast::None));
ASSERT_THAT(BarVal, NotNull());
EXPECT_EQ(FooVal, &Env.getBoolLiteralValue(true));
EXPECT_EQ(BarVal, &Env.getBoolLiteralValue(false));
const ValueDecl *BazDecl = findValueDecl(ASTCtx, "Baz");
ASSERT_THAT(BazDecl, NotNull());
const auto *BazVal = dyn_cast_or_null<ConjunctionValue>(
Env.getValue(*BazDecl, SkipPast::None));
ASSERT_THAT(BazVal, NotNull());
EXPECT_EQ(&BazVal->getLeftSubValue(), FooVal);
EXPECT_EQ(&BazVal->getRightSubValue(), BarVal);
});
}
TEST_F(TransferTest, AssignFromBoolDisjunction) {
std::string Code = R"(
void target() {
bool Foo = true;
bool Bar = true;
bool Baz = (Foo) || (Bar);
// [[p]]
}
)";
runDataflow(
Code, [](llvm::ArrayRef<
std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
Results,
ASTContext &ASTCtx) {
ASSERT_THAT(Results, ElementsAre(Pair("p", _)));
const Environment &Env = Results[0].second.Env;
const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
ASSERT_THAT(FooDecl, NotNull());
const auto *FooVal =
dyn_cast_or_null<BoolValue>(Env.getValue(*FooDecl, SkipPast::None));
ASSERT_THAT(FooVal, NotNull());
const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar");
ASSERT_THAT(BarDecl, NotNull());
const auto *BarVal =
dyn_cast_or_null<BoolValue>(Env.getValue(*BarDecl, SkipPast::None));
ASSERT_THAT(BarVal, NotNull());
const ValueDecl *BazDecl = findValueDecl(ASTCtx, "Baz");
ASSERT_THAT(BazDecl, NotNull());
const auto *BazVal = dyn_cast_or_null<DisjunctionValue>(
Env.getValue(*BazDecl, SkipPast::None));
ASSERT_THAT(BazVal, NotNull());
EXPECT_EQ(&BazVal->getLeftSubValue(), FooVal);
EXPECT_EQ(&BazVal->getRightSubValue(), BarVal);
});
}
TEST_F(TransferTest, AssignFromBoolNegation) {
std::string Code = R"(
void target() {
bool Foo = true;
bool Bar = !(Foo);
// [[p]]
}
)";
runDataflow(Code,
[](llvm::ArrayRef<
std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
Results,
ASTContext &ASTCtx) {
ASSERT_THAT(Results, ElementsAre(Pair("p", _)));
const Environment &Env = Results[0].second.Env;
const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
ASSERT_THAT(FooDecl, NotNull());
const auto *FooVal = dyn_cast_or_null<AtomicBoolValue>(
Env.getValue(*FooDecl, SkipPast::None));
ASSERT_THAT(FooVal, NotNull());
const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar");
ASSERT_THAT(BarDecl, NotNull());
const auto *BarVal = dyn_cast_or_null<NegationValue>(
Env.getValue(*BarDecl, SkipPast::None));
ASSERT_THAT(BarVal, NotNull());
EXPECT_EQ(&BarVal->getSubVal(), FooVal);
});
}
} // namespace

View File

@ -387,7 +387,8 @@ protected:
Code, "target",
[this](ASTContext &Context, Environment &Env) {
assert(HasValueTop == nullptr);
HasValueTop = &Env.takeOwnership(std::make_unique<BoolValue>());
HasValueTop =
&Env.takeOwnership(std::make_unique<AtomicBoolValue>());
return OptionalIntAnalysis(Context, *HasValueTop);
},
[&Match](