[clang][dataflow] Add transfer functions for logical and, or, not.

This is part of the implementation of the dataflow analysis framework.
See "[RFC] A dataflow analysis framework for Clang AST" on cfe-dev.

Reviewed-by: xazax.hun

Differential Revision: https://reviews.llvm.org/D119953
This commit is contained in:
Stanislav Gatev 2022-02-16 16:47:37 +00:00
parent 25f1d50ca5
commit dd4dde8d39
8 changed files with 334 additions and 32 deletions

View File

@ -34,8 +34,8 @@ namespace dataflow {
class DataflowAnalysisContext { class DataflowAnalysisContext {
public: public:
DataflowAnalysisContext() DataflowAnalysisContext()
: TrueVal(&takeOwnership(std::make_unique<BoolValue>())), : TrueVal(takeOwnership(std::make_unique<AtomicBoolValue>())),
FalseVal(&takeOwnership(std::make_unique<BoolValue>())) {} FalseVal(takeOwnership(std::make_unique<AtomicBoolValue>())) {}
/// Takes ownership of `Loc` and returns a reference to it. /// Takes ownership of `Loc` and returns a reference to it.
/// ///
@ -115,8 +115,8 @@ public:
/// Returns a symbolic boolean value that models a boolean literal equal to /// Returns a symbolic boolean value that models a boolean literal equal to
/// `Value`. /// `Value`.
BoolValue &getBoolLiteralValue(bool Value) const { AtomicBoolValue &getBoolLiteralValue(bool Value) const {
return Value ? *TrueVal : *FalseVal; return Value ? TrueVal : FalseVal;
} }
private: private:
@ -135,8 +135,8 @@ private:
StorageLocation *ThisPointeeLoc = nullptr; StorageLocation *ThisPointeeLoc = nullptr;
// FIXME: Add support for boolean expressions. // FIXME: Add support for boolean expressions.
BoolValue *TrueVal; AtomicBoolValue &TrueVal;
BoolValue *FalseVal; AtomicBoolValue &FalseVal;
}; };
} // namespace dataflow } // namespace dataflow

View File

@ -226,7 +226,7 @@ public:
/// Returns a symbolic boolean value that models a boolean literal equal to /// Returns a symbolic boolean value that models a boolean literal equal to
/// `Value` /// `Value`
BoolValue &getBoolLiteralValue(bool Value) const { AtomicBoolValue &getBoolLiteralValue(bool Value) const {
return DACtx->getBoolLiteralValue(Value); return DACtx->getBoolLiteralValue(Value);
} }

View File

@ -20,12 +20,23 @@
namespace clang { namespace clang {
namespace dataflow { namespace dataflow {
/// Maps statements to the environments of basic blocks that contain them.
class StmtToEnvMap {
public:
virtual ~StmtToEnvMap() = default;
/// Returns the environment of the basic block that contains `S` or nullptr if
/// there isn't one.
/// FIXME: Ensure that the result can't be null and return a const reference.
virtual const Environment *getEnvironment(const Stmt &S) const = 0;
};
/// Evaluates `S` and updates `Env` accordingly. /// Evaluates `S` and updates `Env` accordingly.
/// ///
/// Requirements: /// Requirements:
/// ///
/// The type of `S` must not be `ParenExpr`. /// The type of `S` must not be `ParenExpr`.
void transfer(const Stmt &S, Environment &Env); void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env);
} // namespace dataflow } // namespace dataflow
} // namespace clang } // namespace clang

View File

@ -28,7 +28,19 @@ namespace dataflow {
/// Base class for all values computed by abstract interpretation. /// Base class for all values computed by abstract interpretation.
class Value { class Value {
public: public:
enum class Kind { Bool, Integer, Reference, Pointer, Struct }; enum class Kind {
Integer,
Reference,
Pointer,
Struct,
// Synthetic boolean values are either atomic values or composites that
// represent conjunctions, disjunctions, and negations.
AtomicBool,
Conjunction,
Disjunction,
Negation
};
explicit Value(Kind ValKind) : ValKind(ValKind) {} explicit Value(Kind ValKind) : ValKind(ValKind) {}
@ -43,9 +55,88 @@ private:
/// Models a boolean. /// Models a boolean.
class BoolValue : public Value { class BoolValue : public Value {
public: public:
explicit BoolValue() : Value(Kind::Bool) {} explicit BoolValue(Kind ValueKind) : Value(ValueKind) {}
static bool classof(const Value *Val) { return Val->getKind() == Kind::Bool; } static bool classof(const Value *Val) {
return Val->getKind() == Kind::AtomicBool ||
Val->getKind() == Kind::Conjunction ||
Val->getKind() == Kind::Disjunction ||
Val->getKind() == Kind::Negation;
}
};
/// Models an atomic boolean.
class AtomicBoolValue : public BoolValue {
public:
explicit AtomicBoolValue() : BoolValue(Kind::AtomicBool) {}
static bool classof(const Value *Val) {
return Val->getKind() == Kind::AtomicBool;
}
};
/// Models a boolean conjunction.
// FIXME: Consider representing binary and unary boolean operations similar
// to how they are represented in the AST. This might become more pressing
// when such operations need to be added for other data types.
class ConjunctionValue : public BoolValue {
public:
explicit ConjunctionValue(BoolValue &LeftSubVal, BoolValue &RightSubVal)
: BoolValue(Kind::Conjunction), LeftSubVal(LeftSubVal),
RightSubVal(RightSubVal) {}
static bool classof(const Value *Val) {
return Val->getKind() == Kind::Conjunction;
}
/// Returns the left sub-value of the conjunction.
BoolValue &getLeftSubValue() const { return LeftSubVal; }
/// Returns the right sub-value of the conjunction.
BoolValue &getRightSubValue() const { return RightSubVal; }
private:
BoolValue &LeftSubVal;
BoolValue &RightSubVal;
};
/// Models a boolean disjunction.
class DisjunctionValue : public BoolValue {
public:
explicit DisjunctionValue(BoolValue &LeftSubVal, BoolValue &RightSubVal)
: BoolValue(Kind::Disjunction), LeftSubVal(LeftSubVal),
RightSubVal(RightSubVal) {}
static bool classof(const Value *Val) {
return Val->getKind() == Kind::Disjunction;
}
/// Returns the left sub-value of the disjunction.
BoolValue &getLeftSubValue() const { return LeftSubVal; }
/// Returns the right sub-value of the disjunction.
BoolValue &getRightSubValue() const { return RightSubVal; }
private:
BoolValue &LeftSubVal;
BoolValue &RightSubVal;
};
/// Models a boolean negation.
class NegationValue : public BoolValue {
public:
explicit NegationValue(BoolValue &SubVal)
: BoolValue(Kind::Negation), SubVal(SubVal) {}
static bool classof(const Value *Val) {
return Val->getKind() == Kind::Negation;
}
/// Returns the sub-value of the negation.
BoolValue &getSubVal() const { return SubVal; }
private:
BoolValue &SubVal;
}; };
/// Models an integer. /// Models an integer.

View File

@ -39,10 +39,12 @@ static const Expr *skipExprWithCleanups(const Expr *E) {
class TransferVisitor : public ConstStmtVisitor<TransferVisitor> { class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
public: public:
TransferVisitor(Environment &Env) : Env(Env) {} TransferVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env)
: StmtToEnv(StmtToEnv), Env(Env) {}
void VisitBinaryOperator(const BinaryOperator *S) { void VisitBinaryOperator(const BinaryOperator *S) {
if (S->getOpcode() == BO_Assign) { switch (S->getOpcode()) {
case BO_Assign: {
// The CFG does not contain `ParenExpr` as top-level statements in basic // The CFG does not contain `ParenExpr` as top-level statements in basic
// blocks, however sub-expressions can still be of that type. // blocks, however sub-expressions can still be of that type.
assert(S->getLHS() != nullptr); assert(S->getLHS() != nullptr);
@ -51,7 +53,7 @@ public:
assert(LHS != nullptr); assert(LHS != nullptr);
auto *LHSLoc = Env.getStorageLocation(*LHS, SkipPast::Reference); auto *LHSLoc = Env.getStorageLocation(*LHS, SkipPast::Reference);
if (LHSLoc == nullptr) if (LHSLoc == nullptr)
return; break;
// The CFG does not contain `ParenExpr` as top-level statements in basic // The CFG does not contain `ParenExpr` as top-level statements in basic
// blocks, however sub-expressions can still be of that type. // blocks, however sub-expressions can still be of that type.
@ -61,15 +63,57 @@ public:
assert(RHS != nullptr); assert(RHS != nullptr);
Value *RHSVal = Env.getValue(*RHS, SkipPast::Reference); Value *RHSVal = Env.getValue(*RHS, SkipPast::Reference);
if (RHSVal == nullptr) if (RHSVal == nullptr)
return; break;
// Assign a value to the storage location of the left-hand side. // Assign a value to the storage location of the left-hand side.
Env.setValue(*LHSLoc, *RHSVal); Env.setValue(*LHSLoc, *RHSVal);
// Assign a storage location for the whole expression. // Assign a storage location for the whole expression.
Env.setStorageLocation(*S, *LHSLoc); Env.setStorageLocation(*S, *LHSLoc);
break;
}
case BO_LAnd:
case BO_LOr: {
const Expr *LHS = S->getLHS();
assert(LHS != nullptr);
const Expr *RHS = S->getRHS();
assert(RHS != nullptr);
BoolValue *LHSVal =
dyn_cast_or_null<BoolValue>(Env.getValue(*LHS, SkipPast::Reference));
// `RHS` and `S` might be part of different basic blocks. We need to
// access their values from the corresponding environments.
BoolValue *RHSVal = nullptr;
const Environment *RHSEnv = StmtToEnv.getEnvironment(*RHS);
if (RHSEnv != nullptr)
RHSVal = dyn_cast_or_null<BoolValue>(
RHSEnv->getValue(*RHS, SkipPast::Reference));
// Create fresh values for unknown boolean expressions.
// FIXME: Consider providing a `GetOrCreateFresh` util in case this style
// is expected to be common or make sure that all expressions are assigned
// values and drop this.
if (LHSVal == nullptr)
LHSVal = &Env.takeOwnership(std::make_unique<AtomicBoolValue>());
if (RHSVal == nullptr)
RHSVal = &Env.takeOwnership(std::make_unique<AtomicBoolValue>());
auto &Loc = Env.createStorageLocation(*S);
Env.setStorageLocation(*S, Loc);
if (S->getOpcode() == BO_LAnd)
Env.setValue(Loc, Env.takeOwnership(std::make_unique<ConjunctionValue>(
*LHSVal, *RHSVal)));
else
Env.setValue(Loc, Env.takeOwnership(std::make_unique<DisjunctionValue>(
*LHSVal, *RHSVal)));
break;
}
default:
// FIXME: Add support for BO_EQ, BO_NE.
break;
} }
// FIXME: Add support for BO_EQ, BO_NE.
} }
void VisitDeclRefExpr(const DeclRefExpr *S) { void VisitDeclRefExpr(const DeclRefExpr *S) {
@ -212,8 +256,18 @@ public:
Env.setValue(PointerLoc, PointerVal); Env.setValue(PointerLoc, PointerVal);
break; break;
} }
case UO_LNot: {
auto *SubExprVal =
dyn_cast_or_null<BoolValue>(Env.getValue(*SubExpr, SkipPast::None));
if (SubExprVal == nullptr)
return;
auto &ExprLoc = Env.createStorageLocation(*S);
Env.setStorageLocation(*S, ExprLoc);
Env.setValue(ExprLoc, Env.takeOwnership(
std::make_unique<NegationValue>(*SubExprVal)));
}
default: default:
// FIXME: Add support for UO_LNot.
break; break;
} }
} }
@ -450,12 +504,13 @@ public:
} }
private: private:
const StmtToEnvMap &StmtToEnv;
Environment &Env; Environment &Env;
}; };
void transfer(const Stmt &S, Environment &Env) { void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env) {
assert(!isa<ParenExpr>(&S)); assert(!isa<ParenExpr>(&S));
TransferVisitor(Env).Visit(&S); TransferVisitor(StmtToEnv, Env).Visit(&S);
} }
} // namespace dataflow } // namespace dataflow

View File

@ -24,6 +24,7 @@
#include "clang/Analysis/FlowSensitive/Transfer.h" #include "clang/Analysis/FlowSensitive/Transfer.h"
#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
#include "clang/Analysis/FlowSensitive/Value.h" #include "clang/Analysis/FlowSensitive/Value.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/None.h" #include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h" #include "llvm/ADT/Optional.h"
@ -32,6 +33,27 @@
namespace clang { namespace clang {
namespace dataflow { namespace dataflow {
class StmtToEnvMapImpl : public StmtToEnvMap {
public:
StmtToEnvMapImpl(
const ControlFlowContext &CFCtx,
llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>>
BlockToState)
: CFCtx(CFCtx), BlockToState(BlockToState) {}
const Environment *getEnvironment(const Stmt &S) const override {
auto BlockIT = CFCtx.getStmtToBlock().find(&S);
assert(BlockIT != CFCtx.getStmtToBlock().end());
const auto &State = BlockToState[BlockIT->getSecond()->getBlockID()];
assert(State.hasValue());
return &State.getValue().Env;
}
private:
const ControlFlowContext &CFCtx;
llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockToState;
};
/// Computes the input state for a given basic block by joining the output /// Computes the input state for a given basic block by joining the output
/// states of its predecessors. /// states of its predecessors.
/// ///
@ -42,7 +64,7 @@ namespace dataflow {
/// `llvm::None` represent basic blocks that are not evaluated yet. /// `llvm::None` represent basic blocks that are not evaluated yet.
static TypeErasedDataflowAnalysisState computeBlockInputState( static TypeErasedDataflowAnalysisState computeBlockInputState(
const ControlFlowContext &CFCtx, const ControlFlowContext &CFCtx,
std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates, llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates,
const CFGBlock &Block, const Environment &InitEnv, const CFGBlock &Block, const Environment &InitEnv,
TypeErasedDataflowAnalysis &Analysis) { TypeErasedDataflowAnalysis &Analysis) {
llvm::DenseSet<const CFGBlock *> Preds; llvm::DenseSet<const CFGBlock *> Preds;
@ -111,17 +133,19 @@ static TypeErasedDataflowAnalysisState computeBlockInputState(
/// Transfers `State` by evaluating `CfgStmt` in the context of `Analysis`. /// Transfers `State` by evaluating `CfgStmt` in the context of `Analysis`.
/// `HandleTransferredStmt` (if provided) will be applied to `CfgStmt`, after it /// `HandleTransferredStmt` (if provided) will be applied to `CfgStmt`, after it
/// is evaluated. /// is evaluated.
static void static void transferCFGStmt(
transferCFGStmt(const CFGStmt &CfgStmt, TypeErasedDataflowAnalysis &Analysis, const ControlFlowContext &CFCtx,
TypeErasedDataflowAnalysisState &State, llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates,
std::function<void(const CFGStmt &, const CFGStmt &CfgStmt, TypeErasedDataflowAnalysis &Analysis,
const TypeErasedDataflowAnalysisState &)> TypeErasedDataflowAnalysisState &State,
HandleTransferredStmt) { std::function<void(const CFGStmt &,
const TypeErasedDataflowAnalysisState &)>
HandleTransferredStmt) {
const Stmt *S = CfgStmt.getStmt(); const Stmt *S = CfgStmt.getStmt();
assert(S != nullptr); assert(S != nullptr);
if (Analysis.applyBuiltinTransfer()) if (Analysis.applyBuiltinTransfer())
transfer(*S, State.Env); transfer(StmtToEnvMapImpl(CFCtx, BlockStates), *S, State.Env);
Analysis.transferTypeErased(S, State.Lattice, State.Env); Analysis.transferTypeErased(S, State.Lattice, State.Env);
if (HandleTransferredStmt != nullptr) if (HandleTransferredStmt != nullptr)
@ -176,8 +200,8 @@ TypeErasedDataflowAnalysisState transferBlock(
for (const CFGElement &Element : Block) { for (const CFGElement &Element : Block) {
switch (Element.getKind()) { switch (Element.getKind()) {
case CFGElement::Statement: case CFGElement::Statement:
transferCFGStmt(*Element.getAs<CFGStmt>(), Analysis, State, transferCFGStmt(CFCtx, BlockStates, *Element.getAs<CFGStmt>(), Analysis,
HandleTransferredStmt); State, HandleTransferredStmt);
break; break;
case CFGElement::Initializer: case CFGElement::Initializer:
if (Analysis.applyBuiltinTransfer()) if (Analysis.applyBuiltinTransfer())

View File

@ -2006,6 +2006,42 @@ TEST_F(TransferTest, AssignFromBoolLiteral) {
// [[p]] // [[p]]
} }
)"; )";
runDataflow(Code,
[](llvm::ArrayRef<
std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
Results,
ASTContext &ASTCtx) {
ASSERT_THAT(Results, ElementsAre(Pair("p", _)));
const Environment &Env = Results[0].second.Env;
const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
ASSERT_THAT(FooDecl, NotNull());
const auto *FooVal = dyn_cast_or_null<AtomicBoolValue>(
Env.getValue(*FooDecl, SkipPast::None));
ASSERT_THAT(FooVal, NotNull());
const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar");
ASSERT_THAT(BarDecl, NotNull());
const auto *BarVal = dyn_cast_or_null<AtomicBoolValue>(
Env.getValue(*BarDecl, SkipPast::None));
ASSERT_THAT(BarVal, NotNull());
EXPECT_EQ(FooVal, &Env.getBoolLiteralValue(true));
EXPECT_EQ(BarVal, &Env.getBoolLiteralValue(false));
});
}
TEST_F(TransferTest, AssignFromBoolConjunction) {
std::string Code = R"(
void target() {
bool Foo = true;
bool Bar = true;
bool Baz = (Foo) && (Bar);
// [[p]]
}
)";
runDataflow( runDataflow(
Code, [](llvm::ArrayRef< Code, [](llvm::ArrayRef<
std::pair<std::string, DataflowAnalysisState<NoopLattice>>> std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
@ -2028,9 +2064,93 @@ TEST_F(TransferTest, AssignFromBoolLiteral) {
dyn_cast_or_null<BoolValue>(Env.getValue(*BarDecl, SkipPast::None)); dyn_cast_or_null<BoolValue>(Env.getValue(*BarDecl, SkipPast::None));
ASSERT_THAT(BarVal, NotNull()); ASSERT_THAT(BarVal, NotNull());
EXPECT_EQ(FooVal, &Env.getBoolLiteralValue(true)); const ValueDecl *BazDecl = findValueDecl(ASTCtx, "Baz");
EXPECT_EQ(BarVal, &Env.getBoolLiteralValue(false)); ASSERT_THAT(BazDecl, NotNull());
const auto *BazVal = dyn_cast_or_null<ConjunctionValue>(
Env.getValue(*BazDecl, SkipPast::None));
ASSERT_THAT(BazVal, NotNull());
EXPECT_EQ(&BazVal->getLeftSubValue(), FooVal);
EXPECT_EQ(&BazVal->getRightSubValue(), BarVal);
}); });
} }
TEST_F(TransferTest, AssignFromBoolDisjunction) {
std::string Code = R"(
void target() {
bool Foo = true;
bool Bar = true;
bool Baz = (Foo) || (Bar);
// [[p]]
}
)";
runDataflow(
Code, [](llvm::ArrayRef<
std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
Results,
ASTContext &ASTCtx) {
ASSERT_THAT(Results, ElementsAre(Pair("p", _)));
const Environment &Env = Results[0].second.Env;
const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
ASSERT_THAT(FooDecl, NotNull());
const auto *FooVal =
dyn_cast_or_null<BoolValue>(Env.getValue(*FooDecl, SkipPast::None));
ASSERT_THAT(FooVal, NotNull());
const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar");
ASSERT_THAT(BarDecl, NotNull());
const auto *BarVal =
dyn_cast_or_null<BoolValue>(Env.getValue(*BarDecl, SkipPast::None));
ASSERT_THAT(BarVal, NotNull());
const ValueDecl *BazDecl = findValueDecl(ASTCtx, "Baz");
ASSERT_THAT(BazDecl, NotNull());
const auto *BazVal = dyn_cast_or_null<DisjunctionValue>(
Env.getValue(*BazDecl, SkipPast::None));
ASSERT_THAT(BazVal, NotNull());
EXPECT_EQ(&BazVal->getLeftSubValue(), FooVal);
EXPECT_EQ(&BazVal->getRightSubValue(), BarVal);
});
}
TEST_F(TransferTest, AssignFromBoolNegation) {
std::string Code = R"(
void target() {
bool Foo = true;
bool Bar = !(Foo);
// [[p]]
}
)";
runDataflow(Code,
[](llvm::ArrayRef<
std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
Results,
ASTContext &ASTCtx) {
ASSERT_THAT(Results, ElementsAre(Pair("p", _)));
const Environment &Env = Results[0].second.Env;
const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
ASSERT_THAT(FooDecl, NotNull());
const auto *FooVal = dyn_cast_or_null<AtomicBoolValue>(
Env.getValue(*FooDecl, SkipPast::None));
ASSERT_THAT(FooVal, NotNull());
const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar");
ASSERT_THAT(BarDecl, NotNull());
const auto *BarVal = dyn_cast_or_null<NegationValue>(
Env.getValue(*BarDecl, SkipPast::None));
ASSERT_THAT(BarVal, NotNull());
EXPECT_EQ(&BarVal->getSubVal(), FooVal);
});
}
} // namespace } // namespace

View File

@ -387,7 +387,8 @@ protected:
Code, "target", Code, "target",
[this](ASTContext &Context, Environment &Env) { [this](ASTContext &Context, Environment &Env) {
assert(HasValueTop == nullptr); assert(HasValueTop == nullptr);
HasValueTop = &Env.takeOwnership(std::make_unique<BoolValue>()); HasValueTop =
&Env.takeOwnership(std::make_unique<AtomicBoolValue>());
return OptionalIntAnalysis(Context, *HasValueTop); return OptionalIntAnalysis(Context, *HasValueTop);
}, },
[&Match]( [&Match](