[clang][dataflow] Enable merging distinct values in Environment::join

Make specializations of `DataflowAnalysis` extendable with domain-specific
logic for merging distinct values when joining environments. This could be
a strict lattice join or a more general widening operation.

This is part of the implementation of the dataflow analysis framework.
See "[RFC] A dataflow analysis framework for Clang AST" on cfe-dev.

Reviewed-by: xazax.hun

Differential Revision: https://reviews.llvm.org/D118038
This commit is contained in:
Stanislav Gatev 2022-01-24 13:29:06 +00:00
parent de3f81557a
commit d3597ec0aa
11 changed files with 338 additions and 43 deletions

View File

@ -42,6 +42,11 @@ namespace dataflow {
/// * `void transfer(const Stmt *, LatticeT &, Environment &)` - applies the
/// analysis transfer function for a given statement and lattice element.
///
/// `Derived` can optionally override the following members:
/// * `bool merge(QualType, const Value &, const Value &, Value &,
/// Environment &)` - joins distinct values. This could be a strict
/// lattice join or a more general widening operation.
///
/// `LatticeT` is a bounded join-semilattice that is used by `Derived` and must
/// provide the following public members:
/// * `LatticeJoinEffect join(const LatticeT &)` - joins the object and the

View File

@ -22,6 +22,7 @@
#include "llvm/ADT/DenseMap.h"
#include <cassert>
#include <memory>
#include <type_traits>
#include <utility>
#include <vector>
@ -32,15 +33,21 @@ namespace dataflow {
/// is used during dataflow analysis.
class DataflowAnalysisContext {
public:
DataflowAnalysisContext()
: TrueVal(&takeOwnership(std::make_unique<BoolValue>())),
FalseVal(&takeOwnership(std::make_unique<BoolValue>())) {}
/// Takes ownership of `Loc` and returns a reference to it.
///
/// Requirements:
///
/// `Loc` must not be null.
StorageLocation &takeOwnership(std::unique_ptr<StorageLocation> Loc) {
template <typename T>
typename std::enable_if<std::is_base_of<StorageLocation, T>::value, T &>::type
takeOwnership(std::unique_ptr<T> Loc) {
assert(Loc != nullptr);
Locs.push_back(std::move(Loc));
return *Locs.back().get();
return *cast<T>(Locs.back().get());
}
/// Takes ownership of `Val` and returns a reference to it.
@ -48,10 +55,12 @@ public:
/// Requirements:
///
/// `Val` must not be null.
Value &takeOwnership(std::unique_ptr<Value> Val) {
template <typename T>
typename std::enable_if<std::is_base_of<Value, T>::value, T &>::type
takeOwnership(std::unique_ptr<T> Val) {
assert(Val != nullptr);
Vals.push_back(std::move(Val));
return *Vals.back().get();
return *cast<T>(Vals.back().get());
}
/// Assigns `Loc` as the storage location of `D`.
@ -104,6 +113,12 @@ public:
return ThisPointeeLoc;
}
/// Returns a symbolic boolean value that models a boolean literal equal to
/// `Value`.
BoolValue &getBoolLiteralValue(bool Value) const {
return Value ? *TrueVal : *FalseVal;
}
private:
// Storage for the state of a program.
std::vector<std::unique_ptr<StorageLocation>> Locs;
@ -120,6 +135,8 @@ private:
StorageLocation *ThisPointeeLoc = nullptr;
// FIXME: Add support for boolean expressions.
BoolValue *TrueVal;
BoolValue *FalseVal;
};
} // namespace dataflow

View File

@ -26,6 +26,9 @@
#include "clang/Analysis/FlowSensitive/Value.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include <memory>
#include <type_traits>
#include <utility>
namespace clang {
namespace dataflow {
@ -48,6 +51,25 @@ enum class SkipPast {
/// Holds the state of the program (store and heap) at a given program point.
class Environment {
public:
/// Supplements `Environment` with non-standard join operations.
class Merger {
public:
virtual ~Merger() = default;
/// Given distinct `Val1` and `Val2`, modifies `MergedVal` to approximate
/// both `Val1` and `Val2`. This could be a strict lattice join or a more
/// general widening operation. If this function returns true, `MergedVal`
/// will be assigned to a storage location of type `Type` in `Env`.
///
/// Requirements:
///
/// `Val1` and `Val2` must be distinct.
virtual bool merge(QualType Type, const Value &Val1, const Value &Val2,
Value &MergedVal, Environment &Env) {
return false;
}
};
/// Creates an environment that uses `DACtx` to store objects that encompass
/// the state of a program.
explicit Environment(DataflowAnalysisContext &DACtx) : DACtx(&DACtx) {}
@ -64,7 +86,7 @@ public:
bool operator==(const Environment &) const;
LatticeJoinEffect join(const Environment &);
LatticeJoinEffect join(const Environment &, Environment::Merger &);
// FIXME: Rename `createOrGetStorageLocation` to `getOrCreateStorageLocation`,
// `getStableStorageLocation`, or something more appropriate.
@ -142,12 +164,34 @@ public:
Value *getValue(const Expr &E, SkipPast SP) const;
/// Transfers ownership of `Loc` to the analysis context and returns a
/// reference to `Loc`.
StorageLocation &takeOwnership(std::unique_ptr<StorageLocation> Loc);
/// reference to it.
///
/// Requirements:
///
/// `Loc` must not be null.
template <typename T>
typename std::enable_if<std::is_base_of<StorageLocation, T>::value, T &>::type
takeOwnership(std::unique_ptr<T> Loc) {
return DACtx->takeOwnership(std::move(Loc));
}
/// Transfers ownership of `Val` to the analysis context and returns a
/// reference to `Val`.
Value &takeOwnership(std::unique_ptr<Value> Val);
/// reference to it.
///
/// Requirements:
///
/// `Val` must not be null.
template <typename T>
typename std::enable_if<std::is_base_of<Value, T>::value, T &>::type
takeOwnership(std::unique_ptr<T> Val) {
return DACtx->takeOwnership(std::move(Val));
}
/// Returns a symbolic boolean value that models a boolean literal equal to
/// `Value`
BoolValue &getBoolLiteralValue(bool Value) const {
return DACtx->getBoolLiteralValue(Value);
}
private:
/// Creates a value appropriate for `Type`, if `Type` is supported, otherwise

View File

@ -40,7 +40,7 @@ struct TypeErasedLattice {
};
/// Type-erased base class for dataflow analyses built on a single lattice type.
class TypeErasedDataflowAnalysis {
class TypeErasedDataflowAnalysis : public Environment::Merger {
public:
virtual ~TypeErasedDataflowAnalysis() {}

View File

@ -17,6 +17,8 @@
#include "clang/AST/Decl.h"
#include "clang/Analysis/FlowSensitive/StorageLocation.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include <cassert>
#include <utility>
@ -26,7 +28,7 @@ namespace dataflow {
/// Base class for all values computed by abstract interpretation.
class Value {
public:
enum class Kind { Integer, Reference, Pointer, Struct };
enum class Kind { Bool, Integer, Reference, Pointer, Struct };
explicit Value(Kind ValKind) : ValKind(ValKind) {}
@ -38,6 +40,14 @@ private:
Kind ValKind;
};
/// Models a boolean.
class BoolValue : public Value {
public:
explicit BoolValue() : Value(Kind::Bool) {}
static bool classof(const Value *Val) { return Val->getKind() == Kind::Bool; }
};
/// Models an integer.
class IntegerValue : public Value {
public:
@ -110,8 +120,22 @@ public:
/// Assigns `Val` as the child value for `D`.
void setChild(const ValueDecl &D, Value &Val) { Children[&D] = &Val; }
/// Returns the value of the synthetic property with the given `Name` or null
/// if the property isn't assigned a value.
Value *getProperty(llvm::StringRef Name) const {
auto It = Properties.find(Name);
return It == Properties.end() ? nullptr : It->second;
}
/// Assigns `Val` as the value of the synthetic property with the given
/// `Name`.
void setProperty(llvm::StringRef Name, Value &Val) {
Properties.insert_or_assign(Name, &Val);
}
private:
llvm::DenseMap<const ValueDecl *, Value *> Children;
llvm::StringMap<Value *> Properties;
};
} // namespace dataflow

View File

@ -73,7 +73,8 @@ bool Environment::operator==(const Environment &Other) const {
return DeclToLoc == Other.DeclToLoc && LocToVal == Other.LocToVal;
}
LatticeJoinEffect Environment::join(const Environment &Other) {
LatticeJoinEffect Environment::join(const Environment &Other,
Environment::Merger &Merger) {
assert(DACtx == Other.DACtx);
auto Effect = LatticeJoinEffect::Unchanged;
@ -88,10 +89,32 @@ LatticeJoinEffect Environment::join(const Environment &Other) {
if (ExprToLocSizeBefore != ExprToLoc.size())
Effect = LatticeJoinEffect::Changed;
// FIXME: Add support for joining distinct values that are assigned to the
// same storage locations in `LocToVal` and `Other.LocToVal`.
llvm::DenseMap<const StorageLocation *, Value *> MergedLocToVal;
for (auto &Entry : LocToVal) {
const StorageLocation *Loc = Entry.first;
assert(Loc != nullptr);
Value *Val = Entry.second;
assert(Val != nullptr);
auto It = Other.LocToVal.find(Loc);
if (It == Other.LocToVal.end())
continue;
assert(It->second != nullptr);
if (It->second == Val) {
MergedLocToVal.insert({Loc, Val});
continue;
}
// FIXME: Consider destroying `MergedValue` immediately if `Merger::merge`
// returns false to avoid storing unneeded values in `DACtx`.
if (Value *MergedVal = createValue(Loc->getType()))
if (Merger.merge(Loc->getType(), *Val, *It->second, *MergedVal, *this))
MergedLocToVal.insert({Loc, MergedVal});
}
const unsigned LocToValSizeBefore = LocToVal.size();
LocToVal = intersectDenseMaps(LocToVal, Other.LocToVal);
LocToVal = std::move(MergedLocToVal);
if (LocToValSizeBefore != LocToVal.size())
Effect = LatticeJoinEffect::Changed;
@ -267,15 +290,6 @@ Value *Environment::createValueUnlessSelfReferential(
return nullptr;
}
StorageLocation &
Environment::takeOwnership(std::unique_ptr<StorageLocation> Loc) {
return DACtx->takeOwnership(std::move(Loc));
}
Value &Environment::takeOwnership(std::unique_ptr<Value> Val) {
return DACtx->takeOwnership(std::move(Val));
}
StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const {
switch (SP) {
case SkipPast::None:

View File

@ -93,7 +93,7 @@ static TypeErasedDataflowAnalysisState computeBlockInputState(
MaybePredState.getValue();
if (MaybeState.hasValue()) {
Analysis.joinTypeErased(MaybeState->Lattice, PredState.Lattice);
MaybeState->Env.join(PredState.Env);
MaybeState->Env.join(PredState.Env, Analysis);
} else {
MaybeState = PredState;
}

View File

@ -18,8 +18,10 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include "llvm/Testing/Support/Annotations.h"
#include <cassert>
#include <functional>
#include <memory>
#include <string>
@ -29,6 +31,7 @@
using namespace clang;
using namespace dataflow;
using namespace ast_matchers;
static bool
isAnnotationDirectlyAfterStatement(const Stmt *Stmt, unsigned AnnotationBegin,
@ -55,7 +58,6 @@ test::buildStatementToAnnotationMapping(const FunctionDecl *Func,
llvm::Annotations AnnotatedCode) {
llvm::DenseMap<const Stmt *, std::string> Result;
using namespace ast_matchers; // NOLINT: Too many names
auto StmtMatcher =
findAll(stmt(unless(anyOf(hasParent(expr()), hasParent(returnStmt()))))
.bind("stmt"));
@ -121,3 +123,11 @@ test::buildStatementToAnnotationMapping(const FunctionDecl *Func,
return Result;
}
const ValueDecl *test::findValueDecl(ASTContext &ASTCtx, llvm::StringRef Name) {
auto TargetNodes = match(valueDecl(hasName(Name)).bind("v"), ASTCtx);
assert(TargetNodes.size() == 1 && "Name must be unique");
auto *const Result = selectFirst<ValueDecl>("v", TargetNodes);
assert(Result != nullptr);
return Result;
}

View File

@ -165,6 +165,13 @@ llvm::Error checkDataflow(
VirtualMappedFiles);
}
/// Returns the `ValueDecl` for the given identifier.
///
/// Requirements:
///
/// `Name` must be unique in `ASTCtx`.
const ValueDecl *findValueDecl(ASTContext &ASTCtx, llvm::StringRef Name);
} // namespace test
} // namespace dataflow
} // namespace clang

View File

@ -22,7 +22,6 @@
#include "llvm/Testing/Support/Error.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <cassert>
#include <string>
#include <utility>
@ -30,6 +29,7 @@ namespace {
using namespace clang;
using namespace dataflow;
using namespace test;
using ::testing::_;
using ::testing::ElementsAre;
using ::testing::IsNull;
@ -58,21 +58,6 @@ protected:
}
};
/// Returns the `ValueDecl` for the given identifier.
///
/// Requirements:
///
/// `Name` must be unique in `ASTCtx`.
static const ValueDecl *findValueDecl(ASTContext &ASTCtx,
llvm::StringRef Name) {
auto TargetNodes = ast_matchers::match(
ast_matchers::valueDecl(ast_matchers::hasName(Name)).bind("v"), ASTCtx);
assert(TargetNodes.size() == 1 && "Name must be unique");
auto *const Result = ast_matchers::selectFirst<ValueDecl>("v", TargetNodes);
assert(Result != nullptr);
return Result;
}
TEST_F(TransferTest, IntVarDecl) {
std::string Code = R"(
void target() {

View File

@ -9,6 +9,7 @@
#include "NoopAnalysis.h"
#include "TestingSupport.h"
#include "clang/AST/Decl.h"
#include "clang/AST/ExprCXX.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/Analysis/CFG.h"
@ -16,6 +17,7 @@
#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
#include "clang/Analysis/FlowSensitive/Value.h"
#include "clang/Tooling/Tooling.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
@ -35,8 +37,15 @@ namespace {
using namespace clang;
using namespace dataflow;
using namespace test;
using namespace ast_matchers;
using ::testing::_;
using ::testing::ElementsAre;
using ::testing::IsEmpty;
using ::testing::IsNull;
using ::testing::NotNull;
using ::testing::Pair;
using ::testing::Test;
using ::testing::UnorderedElementsAre;
template <typename AnalysisT>
@ -174,7 +183,7 @@ public:
}
};
class NoreturnDestructorTest : public ::testing::Test {
class NoreturnDestructorTest : public Test {
protected:
template <typename Matcher>
void runDataflow(llvm::StringRef Code, Matcher Expectations) {
@ -300,4 +309,184 @@ TEST_F(NoreturnDestructorTest, ConditionalOperatorNestedBranchReturns) {
// FIXME: Called functions at point `p` should contain only "foo".
}
class OptionalIntAnalysis
: public DataflowAnalysis<OptionalIntAnalysis, NoopLattice> {
public:
explicit OptionalIntAnalysis(ASTContext &Context)
: DataflowAnalysis<OptionalIntAnalysis, NoopLattice>(Context) {}
static NoopLattice initialElement() { return {}; }
void transfer(const Stmt *S, NoopLattice &, Environment &Env) {
auto OptionalIntRecordDecl = recordDecl(hasName("OptionalInt"));
auto HasOptionalIntType = hasType(OptionalIntRecordDecl);
if (const auto *E = selectFirst<CXXConstructExpr>(
"call", match(cxxConstructExpr(HasOptionalIntType).bind("call"), *S,
getASTContext()))) {
auto &ConstructorVal = *cast<StructValue>(Env.createValue(E->getType()));
ConstructorVal.setProperty("has_value", Env.getBoolLiteralValue(false));
Env.setValue(*Env.getStorageLocation(*E, SkipPast::None), ConstructorVal);
} else if (const auto *E = selectFirst<CXXOperatorCallExpr>(
"call",
match(cxxOperatorCallExpr(callee(cxxMethodDecl(ofClass(
OptionalIntRecordDecl))))
.bind("call"),
*S, getASTContext()))) {
assert(E->getNumArgs() > 0);
auto *Object = E->getArg(0);
assert(Object != nullptr);
auto *ObjectLoc =
Env.getStorageLocation(*Object, SkipPast::ReferenceThenPointer);
assert(ObjectLoc != nullptr);
auto &ConstructorVal =
*cast<StructValue>(Env.createValue(Object->getType()));
ConstructorVal.setProperty("has_value", Env.getBoolLiteralValue(true));
Env.setValue(*ObjectLoc, ConstructorVal);
}
}
bool merge(QualType Type, const Value &Val1, const Value &Val2,
Value &MergedVal, Environment &Env) final {
if (!Type->isRecordType() ||
Type->getAsCXXRecordDecl()->getQualifiedNameAsString() != "OptionalInt")
return false;
auto *HasValue1 = cast_or_null<BoolValue>(
cast<StructValue>(&Val1)->getProperty("has_value"));
if (HasValue1 == nullptr)
return false;
auto *HasValue2 = cast_or_null<BoolValue>(
cast<StructValue>(&Val2)->getProperty("has_value"));
if (HasValue2 == nullptr)
return false;
if (HasValue1 != HasValue2)
return false;
cast<StructValue>(&MergedVal)->setProperty("has_value", *HasValue1);
return true;
}
};
class WideningTest : public Test {
protected:
template <typename Matcher>
void runDataflow(llvm::StringRef Code, Matcher Match) {
tooling::FileContentMappings FilesContents;
FilesContents.push_back(
std::make_pair<std::string, std::string>("widening_test_defs.h", R"(
struct OptionalInt {
OptionalInt() = default;
OptionalInt& operator=(int);
};
)"));
ASSERT_THAT_ERROR(
test::checkDataflow<OptionalIntAnalysis>(
Code, "target",
[](ASTContext &Context, Environment &Env) {
return OptionalIntAnalysis(Context);
},
[&Match](
llvm::ArrayRef<
std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
Results,
ASTContext &ASTCtx) { Match(Results, ASTCtx); },
{"-fsyntax-only", "-std=c++17"}, FilesContents),
llvm::Succeeded());
}
};
TEST_F(WideningTest, JoinDistinctValuesWithDistinctProperties) {
std::string Code = R"(
#include "widening_test_defs.h"
void target(bool Cond) {
OptionalInt Foo;
/*[[p1]]*/
if (Cond) {
Foo = 1;
/*[[p2]]*/
}
(void)0;
/*[[p3]]*/
}
)";
runDataflow(
Code, [](llvm::ArrayRef<
std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
Results,
ASTContext &ASTCtx) {
ASSERT_THAT(Results,
ElementsAre(Pair("p3", _), Pair("p2", _), Pair("p1", _)));
const Environment &Env1 = Results[2].second.Env;
const Environment &Env2 = Results[1].second.Env;
const Environment &Env3 = Results[0].second.Env;
const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
ASSERT_THAT(FooDecl, NotNull());
auto GetFooValue = [FooDecl](const Environment &Env) {
return cast<StructValue>(Env.getValue(*FooDecl, SkipPast::None));
};
EXPECT_EQ(GetFooValue(Env1)->getProperty("has_value"),
&Env1.getBoolLiteralValue(false));
EXPECT_EQ(GetFooValue(Env2)->getProperty("has_value"),
&Env2.getBoolLiteralValue(true));
EXPECT_THAT(Env3.getValue(*FooDecl, SkipPast::None), IsNull());
});
}
TEST_F(WideningTest, JoinDistinctValuesWithSameProperties) {
std::string Code = R"(
#include "widening_test_defs.h"
void target(bool Cond) {
OptionalInt Foo;
/*[[p1]]*/
if (Cond) {
Foo = 1;
/*[[p2]]*/
} else {
Foo = 2;
/*[[p3]]*/
}
(void)0;
/*[[p4]]*/
}
)";
runDataflow(
Code, [](llvm::ArrayRef<
std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
Results,
ASTContext &ASTCtx) {
ASSERT_THAT(Results, ElementsAre(Pair("p4", _), Pair("p3", _),
Pair("p2", _), Pair("p1", _)));
const Environment &Env1 = Results[3].second.Env;
const Environment &Env2 = Results[2].second.Env;
const Environment &Env3 = Results[1].second.Env;
const Environment &Env4 = Results[0].second.Env;
const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
ASSERT_THAT(FooDecl, NotNull());
auto GetFooValue = [FooDecl](const Environment &Env) {
return cast<StructValue>(Env.getValue(*FooDecl, SkipPast::None));
};
EXPECT_EQ(GetFooValue(Env1)->getProperty("has_value"),
&Env1.getBoolLiteralValue(false));
EXPECT_EQ(GetFooValue(Env2)->getProperty("has_value"),
&Env2.getBoolLiteralValue(true));
EXPECT_EQ(GetFooValue(Env3)->getProperty("has_value"),
&Env3.getBoolLiteralValue(true));
EXPECT_EQ(GetFooValue(Env4)->getProperty("has_value"),
&Env4.getBoolLiteralValue(true));
});
}
} // namespace