Add a CFL Alias Analysis implementation

This provides an implementation of CFL alias analysis (including some
supporting data structures). Currently, we don't have any extremely fancy
features, sans some interprocedural analysis (i.e. no field sensitivity, etc.),
and we do best sitting behind BasicAA + TBAA. In such a configuration, we take
~0.6-0.8% of total compile time, and give ~7-8% NoAlias responses to queries
TBAA and BasicAA couldn't answer when bootstrapping LLVM. In testing this on
other projects, we've seen up to 10.5% of queries dropped by BasicAA+TBAA
answered with NoAlias by this algorithm.

Patch by George Burgess IV (with minor modifications by me -- mostly adapting
some BasicAA tests), thanks!

llvm-svn: 216970
This commit is contained in:
Hal Finkel 2014-09-02 21:43:13 +00:00
parent cdd7cbcac0
commit 7529c55c02
15 changed files with 1893 additions and 0 deletions

View File

@ -64,6 +64,13 @@ namespace llvm {
//
ImmutablePass *createBasicAliasAnalysisPass();
//===--------------------------------------------------------------------===//
//
// createCFLAliasAnalysisPass - This pass implements a set-based approach to
// alias analysis.
//
ImmutablePass *createCFLAliasAnalysisPass();
//===--------------------------------------------------------------------===//
//
/// createLibCallAliasAnalysisPass - Create an alias analysis pass that knows

View File

@ -89,6 +89,7 @@ void initializeCFGOnlyPrinterPass(PassRegistry&);
void initializeCFGOnlyViewerPass(PassRegistry&);
void initializeCFGPrinterPass(PassRegistry&);
void initializeCFGSimplifyPassPass(PassRegistry&);
void initializeCFLAliasAnalysisPass(PassRegistry&);
void initializeFlattenCFGPassPass(PassRegistry&);
void initializeStructurizeCFGPass(PassRegistry&);
void initializeCFGViewerPass(PassRegistry&);

View File

@ -62,6 +62,7 @@ namespace {
(void) llvm::createCallGraphPrinterPass();
(void) llvm::createCallGraphViewerPass();
(void) llvm::createCFGSimplificationPass();
(void) llvm::createCFLAliasAnalysisPass();
(void) llvm::createStructurizeCFGPass();
(void) llvm::createConstantMergePass();
(void) llvm::createConstantPropagationPass();

View File

@ -34,6 +34,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeCFGPrinterPass(Registry);
initializeCFGOnlyViewerPass(Registry);
initializeCFGOnlyPrinterPass(Registry);
initializeCFLAliasAnalysisPass(Registry);
initializeDependenceAnalysisPass(Registry);
initializeDelinearizationPass(Registry);
initializeDominanceFrontierPass(Registry);

View File

@ -0,0 +1,981 @@
//===- CFLAliasAnalysis.cpp - CFL-Based Alias Analysis Implementation ------==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements a CFL-based context-insensitive alias analysis
// algorithm. It does not depend on types. The algorithm is a mixture of the one
// described in "Demand-driven alias analysis for C" by Xin Zheng and Radu
// Rugina, and "Fast algorithms for Dyck-CFL-reachability with applications to
// Alias Analysis" by Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the
// papers, we build a graph of the uses of a variable, where each node is a
// memory location, and each edge is an action that happened on that memory
// location. The "actions" can be one of Dereference, Reference, Assign, or
// Assign.
//
// Two variables are considered as aliasing iff you can reach one value's node
// from the other value's node and the language formed by concatenating all of
// the edge labels (actions) conforms to a context-free grammar.
//
// Because this algorithm requires a graph search on each query, we execute the
// algorithm outlined in "Fast algorithms..." (mentioned above)
// in order to transform the graph into sets of variables that may alias in
// ~nlogn time (n = number of variables.), which makes queries take constant
// time.
//===----------------------------------------------------------------------===//
#include "StratifiedSets.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/None.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <cassert>
#include <forward_list>
#include <tuple>
using namespace llvm;
// Try to go from a Value* to a Function*. Never returns nullptr.
static Optional<Function *> parentFunctionOfValue(Value *);
// Returns possible functions called by the Inst* into the given
// SmallVectorImpl. Returns true if targets found, false otherwise.
// This is templated because InvokeInst/CallInst give us the same
// set of functions that we care about, and I don't like repeating
// myself.
template <typename Inst>
static bool getPossibleTargets(Inst *, SmallVectorImpl<Function *> &);
// Some instructions need to have their users tracked. Instructions like
// `add` require you to get the users of the Instruction* itself, other
// instructions like `store` require you to get the users of the first
// operand. This function gets the "proper" value to track for each
// type of instruction we support.
static Optional<Value *> getTargetValue(Instruction *);
// There are certain instructions (i.e. FenceInst, etc.) that we ignore.
// This notes that we should ignore those.
static bool hasUsefulEdges(Instruction *);
namespace {
// StratifiedInfo Attribute things.
typedef unsigned StratifiedAttr;
constexpr unsigned MaxStratifiedAttrIndex = NumStratifiedAttrs;
constexpr unsigned AttrAllIndex = 0;
constexpr unsigned AttrGlobalIndex = 1;
constexpr unsigned AttrFirstArgIndex = 2;
constexpr unsigned AttrLastArgIndex = MaxStratifiedAttrIndex;
constexpr unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex;
constexpr StratifiedAttr AttrNone = 0;
constexpr StratifiedAttr AttrAll = ~AttrNone;
// \brief StratifiedSets call for knowledge of "direction", so this is how we
// represent that locally.
enum class Level { Same, Above, Below };
// \brief Edges can be one of four "weights" -- each weight must have an inverse
// weight (Assign has Assign; Reference has Dereference).
enum class EdgeType {
// The weight assigned when assigning from or to a value. For example, in:
// %b = getelementptr %a, 0
// ...The relationships are %b assign %a, and %a assign %b. This used to be
// two edges, but having a distinction bought us nothing.
Assign,
// The edge used when we have an edge going from some handle to a Value.
// Examples of this include:
// %b = load %a (%b Dereference %a)
// %b = extractelement %a, 0 (%a Dereference %b)
Dereference,
// The edge used when our edge goes from a value to a handle that may have
// contained it at some point. Examples:
// %b = load %a (%a Reference %b)
// %b = extractelement %a, 0 (%b Reference %a)
Reference
};
// \brief Encodes the notion of a "use"
struct Edge {
// \brief Which value the edge is coming from
Value *From;
// \brief Which value the edge is pointing to
Value *To;
// \brief Edge weight
EdgeType Weight;
// \brief Whether we aliased any external values along the way that may be
// invisible to the analysis (i.e. landingpad for exceptions, calls for
// interprocedural analysis, etc.)
StratifiedAttrs AdditionalAttrs;
Edge(Value *From, Value *To, EdgeType W, StratifiedAttrs A)
: From(From), To(To), Weight(W), AdditionalAttrs(A) {}
};
// \brief Information we have about a function and would like to keep around
struct FunctionInfo {
StratifiedSets<Value *> Sets;
// Lots of functions have < 4 returns. Adjust as necessary.
SmallVector<Value *, 4> ReturnedValues;
};
struct CFLAliasAnalysis;
struct FunctionHandle : public CallbackVH {
FunctionHandle(Function *Fn, CFLAliasAnalysis *CFLAA)
: CallbackVH(Fn), CFLAA(CFLAA) {
assert(Fn != nullptr);
assert(CFLAA != nullptr);
}
virtual ~FunctionHandle() {}
virtual void deleted() override { removeSelfFromCache(); }
virtual void allUsesReplacedWith(Value *) override { removeSelfFromCache(); }
private:
CFLAliasAnalysis *CFLAA;
void removeSelfFromCache();
};
struct CFLAliasAnalysis : public ImmutablePass, public AliasAnalysis {
private:
/// \brief Cached mapping of Functions to their StratifiedSets.
/// If a function's sets are currently being built, it is marked
/// in the cache as an Optional without a value. This way, if we
/// have any kind of recursion, it is discernable from a function
/// that simply has empty sets.
DenseMap<Function *, Optional<FunctionInfo>> Cache;
std::forward_list<FunctionHandle> Handles;
public:
static char ID;
CFLAliasAnalysis() : ImmutablePass(ID) {
initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry());
}
virtual ~CFLAliasAnalysis() {}
void getAnalysisUsage(AnalysisUsage &AU) const {
AliasAnalysis::getAnalysisUsage(AU);
}
void *getAdjustedAnalysisPointer(const void *ID) override {
if (ID == &AliasAnalysis::ID)
return (AliasAnalysis *)this;
return this;
}
/// \brief Inserts the given Function into the cache.
void scan(Function *Fn);
void evict(Function *Fn) { Cache.erase(Fn); }
/// \brief Ensures that the given function is available in the cache.
/// Returns the appropriate entry from the cache.
const Optional<FunctionInfo> &ensureCached(Function *Fn) {
auto Iter = Cache.find(Fn);
if (Iter == Cache.end()) {
scan(Fn);
Iter = Cache.find(Fn);
assert(Iter != Cache.end());
assert(Iter->second.hasValue());
}
return Iter->second;
}
AliasResult query(const Location &LocA, const Location &LocB);
AliasResult alias(const Location &LocA, const Location &LocB) override {
if (LocA.Ptr == LocB.Ptr) {
if (LocA.Size == LocB.Size) {
return MustAlias;
} else {
return PartialAlias;
}
}
// Comparisons between global variables and other constants should be
// handled by BasicAA.
if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) {
return MayAlias;
}
return query(LocA, LocB);
}
void initializePass() override { InitializeAliasAnalysis(this); }
};
void FunctionHandle::removeSelfFromCache() {
assert(CFLAA != nullptr);
auto *Val = getValPtr();
CFLAA->evict(cast<Function>(Val));
setValPtr(nullptr);
}
// \brief Gets the edges our graph should have, based on an Instruction*
class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
CFLAliasAnalysis &AA;
SmallVectorImpl<Edge> &Output;
public:
GetEdgesVisitor(CFLAliasAnalysis &AA, SmallVectorImpl<Edge> &Output)
: AA(AA), Output(Output) {}
void visitInstruction(Instruction &) {
llvm_unreachable("Unsupported instruction encountered");
}
void visitCastInst(CastInst &Inst) {
Output.push_back({&Inst, Inst.getOperand(0), EdgeType::Assign, AttrNone});
}
void visitBinaryOperator(BinaryOperator &Inst) {
auto *Op1 = Inst.getOperand(0);
auto *Op2 = Inst.getOperand(1);
Output.push_back({&Inst, Op1, EdgeType::Assign, AttrNone});
Output.push_back({&Inst, Op2, EdgeType::Assign, AttrNone});
}
void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
auto *Ptr = Inst.getPointerOperand();
auto *Val = Inst.getNewValOperand();
Output.push_back({Ptr, Val, EdgeType::Dereference, AttrNone});
}
void visitAtomicRMWInst(AtomicRMWInst &Inst) {
auto *Ptr = Inst.getPointerOperand();
auto *Val = Inst.getValOperand();
Output.push_back({Ptr, Val, EdgeType::Dereference, AttrNone});
}
void visitPHINode(PHINode &Inst) {
for (unsigned I = 0, E = Inst.getNumIncomingValues(); I != E; ++I) {
Value *Val = Inst.getIncomingValue(I);
Output.push_back({&Inst, Val, EdgeType::Assign, AttrNone});
}
}
void visitGetElementPtrInst(GetElementPtrInst &Inst) {
auto *Op = Inst.getPointerOperand();
Output.push_back({&Inst, Op, EdgeType::Assign, AttrNone});
for (auto I = Inst.idx_begin(), E = Inst.idx_end(); I != E; ++I)
Output.push_back({&Inst, *I, EdgeType::Assign, AttrNone});
}
void visitSelectInst(SelectInst &Inst) {
auto *Condition = Inst.getCondition();
Output.push_back({&Inst, Condition, EdgeType::Assign, AttrNone});
auto *TrueVal = Inst.getTrueValue();
Output.push_back({&Inst, TrueVal, EdgeType::Assign, AttrNone});
auto *FalseVal = Inst.getFalseValue();
Output.push_back({&Inst, FalseVal, EdgeType::Assign, AttrNone});
}
void visitAllocaInst(AllocaInst &) {}
void visitLoadInst(LoadInst &Inst) {
auto *Ptr = Inst.getPointerOperand();
auto *Val = &Inst;
Output.push_back({Val, Ptr, EdgeType::Reference, AttrNone});
}
void visitStoreInst(StoreInst &Inst) {
auto *Ptr = Inst.getPointerOperand();
auto *Val = Inst.getValueOperand();
Output.push_back({Ptr, Val, EdgeType::Dereference, AttrNone});
}
static bool isFunctionExternal(Function *Fn) {
return Fn->isDeclaration() || !Fn->hasLocalLinkage();
}
// Gets whether the sets at Index1 above, below, or equal to the sets at
// Index2. Returns None if they are not in the same set chain.
static Optional<Level> getIndexRelation(const StratifiedSets<Value *> &Sets,
StratifiedIndex Index1,
StratifiedIndex Index2) {
if (Index1 == Index2)
return Level::Same;
const auto *Current = &Sets.getLink(Index1);
while (Current->hasBelow()) {
if (Current->Below == Index2)
return Level::Below;
Current = &Sets.getLink(Current->Below);
}
Current = &Sets.getLink(Index1);
while (Current->hasAbove()) {
if (Current->Above == Index2)
return Level::Above;
Current = &Sets.getLink(Current->Above);
}
return NoneType();
}
bool
tryInterproceduralAnalysis(const SmallVectorImpl<Function *> &Fns,
Value *FuncValue,
const iterator_range<User::op_iterator> &Args) {
constexpr unsigned ExpectedMaxArgs = 8;
constexpr unsigned MaxSupportedArgs = 50;
assert(Fns.size() > 0);
// I put this here to give us an upper bound on time taken by IPA. Is it
// really (realistically) needed? Keep in mind that we do have an n^2 algo.
if (std::distance(Args.begin(), Args.end()) > MaxSupportedArgs)
return false;
// Exit early if we'll fail anyway
for (auto *Fn : Fns) {
if (isFunctionExternal(Fn) || Fn->isVarArg())
return false;
auto &MaybeInfo = AA.ensureCached(Fn);
if (!MaybeInfo.hasValue())
return false;
}
SmallVector<Value *, ExpectedMaxArgs> Arguments(Args.begin(), Args.end());
SmallVector<StratifiedInfo, ExpectedMaxArgs> Parameters;
for (auto *Fn : Fns) {
auto &Info = *AA.ensureCached(Fn);
auto &Sets = Info.Sets;
auto &RetVals = Info.ReturnedValues;
Parameters.clear();
for (auto &Param : Fn->args()) {
auto MaybeInfo = Sets.find(&Param);
// Did a new parameter somehow get added to the function/slip by?
if (!MaybeInfo.hasValue())
return false;
Parameters.push_back(*MaybeInfo);
}
// Adding an edge from argument -> return value for each parameter that
// may alias the return value
for (unsigned I = 0, E = Parameters.size(); I != E; ++I) {
auto &ParamInfo = Parameters[I];
auto &ArgVal = Arguments[I];
bool AddEdge = false;
StratifiedAttrs Externals;
for (unsigned X = 0, XE = RetVals.size(); X != XE; ++X) {
auto MaybeInfo = Sets.find(RetVals[X]);
if (!MaybeInfo.hasValue())
return false;
auto &RetInfo = *MaybeInfo;
auto RetAttrs = Sets.getLink(RetInfo.Index).Attrs;
auto ParamAttrs = Sets.getLink(ParamInfo.Index).Attrs;
auto MaybeRelation =
getIndexRelation(Sets, ParamInfo.Index, RetInfo.Index);
if (MaybeRelation.hasValue()) {
AddEdge = true;
Externals |= RetAttrs | ParamAttrs;
}
}
if (AddEdge)
Output.push_back({FuncValue, ArgVal, EdgeType::Assign,
StratifiedAttrs().flip()});
}
if (Parameters.size() != Arguments.size())
return false;
// Adding edges between arguments for arguments that may end up aliasing
// each other. This is necessary for functions such as
// void foo(int** a, int** b) { *a = *b; }
// (Technically, the proper sets for this would be those below
// Arguments[I] and Arguments[X], but our algorithm will produce
// extremely similar, and equally correct, results either way)
for (unsigned I = 0, E = Arguments.size(); I != E; ++I) {
auto &MainVal = Arguments[I];
auto &MainInfo = Parameters[I];
auto &MainAttrs = Sets.getLink(MainInfo.Index).Attrs;
for (unsigned X = I + 1; X != E; ++X) {
auto &SubInfo = Parameters[X];
auto &SubVal = Arguments[X];
auto &SubAttrs = Sets.getLink(SubInfo.Index).Attrs;
auto MaybeRelation =
getIndexRelation(Sets, MainInfo.Index, SubInfo.Index);
if (!MaybeRelation.hasValue())
continue;
auto NewAttrs = SubAttrs | MainAttrs;
Output.push_back({MainVal, SubVal, EdgeType::Assign, NewAttrs});
}
}
}
return true;
}
template <typename InstT> void visitCallLikeInst(InstT &Inst) {
SmallVector<Function *, 4> Targets;
if (getPossibleTargets(&Inst, Targets)) {
if (tryInterproceduralAnalysis(Targets, &Inst, Inst.arg_operands()))
return;
// Cleanup from interprocedural analysis
Output.clear();
}
for (Value *V : Inst.arg_operands())
Output.push_back({&Inst, V, EdgeType::Assign, AttrAll});
}
void visitCallInst(CallInst &Inst) { visitCallLikeInst(Inst); }
void visitInvokeInst(InvokeInst &Inst) { visitCallLikeInst(Inst); }
// Because vectors/aggregates are immutable and unaddressable,
// there's nothing we can do to coax a value out of them, other
// than calling Extract{Element,Value}. We can effectively treat
// them as pointers to arbitrary memory locations we can store in
// and load from.
void visitExtractElementInst(ExtractElementInst &Inst) {
auto *Ptr = Inst.getVectorOperand();
auto *Val = &Inst;
Output.push_back({Val, Ptr, EdgeType::Reference, AttrNone});
}
void visitInsertElementInst(InsertElementInst &Inst) {
auto *Vec = Inst.getOperand(0);
auto *Val = Inst.getOperand(1);
Output.push_back({&Inst, Vec, EdgeType::Assign, AttrNone});
Output.push_back({&Inst, Val, EdgeType::Dereference, AttrNone});
}
void visitLandingPadInst(LandingPadInst &Inst) {
// Exceptions come from "nowhere", from our analysis' perspective.
// So we place the instruction its own group, noting that said group may
// alias externals
Output.push_back({&Inst, &Inst, EdgeType::Assign, AttrAll});
}
void visitInsertValueInst(InsertValueInst &Inst) {
auto *Agg = Inst.getOperand(0);
auto *Val = Inst.getOperand(1);
Output.push_back({&Inst, Agg, EdgeType::Assign, AttrNone});
Output.push_back({&Inst, Val, EdgeType::Dereference, AttrNone});
}
void visitExtractValueInst(ExtractValueInst &Inst) {
auto *Ptr = Inst.getAggregateOperand();
Output.push_back({&Inst, Ptr, EdgeType::Reference, AttrNone});
}
void visitShuffleVectorInst(ShuffleVectorInst &Inst) {
auto *From1 = Inst.getOperand(0);
auto *From2 = Inst.getOperand(1);
Output.push_back({&Inst, From1, EdgeType::Assign, AttrNone});
Output.push_back({&Inst, From2, EdgeType::Assign, AttrNone});
}
};
// For a given instruction, we need to know which Value* to get the
// users of in order to build our graph. In some cases (i.e. add),
// we simply need the Instruction*. In other cases (i.e. store),
// finding the users of the Instruction* is useless; we need to find
// the users of the first operand. This handles determining which
// value to follow for us.
//
// Note: we *need* to keep this in sync with GetEdgesVisitor. Add
// something to GetEdgesVisitor, add it here -- remove something from
// GetEdgesVisitor, remove it here.
class GetTargetValueVisitor
: public InstVisitor<GetTargetValueVisitor, Value *> {
public:
Value *visitInstruction(Instruction &Inst) { return &Inst; }
Value *visitStoreInst(StoreInst &Inst) { return Inst.getPointerOperand(); }
Value *visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
return Inst.getPointerOperand();
}
Value *visitAtomicRMWInst(AtomicRMWInst &Inst) {
return Inst.getPointerOperand();
}
Value *visitInsertElementInst(InsertElementInst &Inst) {
return Inst.getOperand(0);
}
Value *visitInsertValueInst(InsertValueInst &Inst) {
return Inst.getAggregateOperand();
}
};
// Set building requires a weighted bidirectional graph.
template <typename EdgeTypeT> class WeightedBidirectionalGraph {
public:
typedef std::size_t Node;
private:
constexpr static Node StartNode = Node(0);
struct Edge {
EdgeTypeT Weight;
Node Other;
bool operator==(const Edge &E) const {
return Weight == E.Weight && Other == E.Other;
}
bool operator!=(const Edge &E) const { return !operator==(E); }
};
struct NodeImpl {
std::vector<Edge> Edges;
};
std::vector<NodeImpl> NodeImpls;
bool inbounds(Node NodeIndex) const { return NodeIndex < NodeImpls.size(); }
const NodeImpl &getNode(Node N) const { return NodeImpls[N]; }
NodeImpl &getNode(Node N) { return NodeImpls[N]; }
public:
// ----- Various Edge iterators for the graph ----- //
// \brief Iterator for edges. Because this graph is bidirected, we don't
// allow modificaiton of the edges using this iterator. Additionally, the
// iterator becomes invalid if you add edges to or from the node you're
// getting the edges of.
struct EdgeIterator : public std::iterator<std::forward_iterator_tag,
std::tuple<EdgeTypeT, Node *>> {
EdgeIterator(const typename std::vector<Edge>::const_iterator &Iter)
: Current(Iter) {}
EdgeIterator(NodeImpl &Impl) : Current(Impl.begin()) {}
EdgeIterator &operator++() {
++Current;
return *this;
}
EdgeIterator operator++(int) {
EdgeIterator Copy(Current);
operator++();
return Copy;
}
std::tuple<EdgeTypeT, Node> &operator*() {
Store = std::make_tuple(Current->Weight, Current->Other);
return Store;
}
bool operator==(const EdgeIterator &Other) const {
return Current == Other.Current;
}
bool operator!=(const EdgeIterator &Other) const {
return !operator==(Other);
}
private:
typename std::vector<Edge>::const_iterator Current;
std::tuple<EdgeTypeT, Node> Store;
};
// Wrapper for EdgeIterator with begin()/end() calls.
struct EdgeIterable {
EdgeIterable(const std::vector<Edge> &Edges)
: BeginIter(Edges.begin()), EndIter(Edges.end()) {}
EdgeIterator begin() { return EdgeIterator(BeginIter); }
EdgeIterator end() { return EdgeIterator(EndIter); }
private:
typename std::vector<Edge>::const_iterator BeginIter;
typename std::vector<Edge>::const_iterator EndIter;
};
// ----- Actual graph-related things ----- //
WeightedBidirectionalGraph() = default;
WeightedBidirectionalGraph(WeightedBidirectionalGraph<EdgeTypeT> &&Other)
: NodeImpls(std::move(Other.NodeImpls)) {}
WeightedBidirectionalGraph<EdgeTypeT> &
operator=(WeightedBidirectionalGraph<EdgeTypeT> &&Other) {
NodeImpls = std::move(Other.NodeImpls);
return *this;
}
Node addNode() {
auto Index = NodeImpls.size();
auto NewNode = Node(Index);
NodeImpls.push_back(NodeImpl());
return NewNode;
}
void addEdge(Node From, Node To, const EdgeTypeT &Weight,
const EdgeTypeT &ReverseWeight) {
assert(inbounds(From));
assert(inbounds(To));
auto &FromNode = getNode(From);
auto &ToNode = getNode(To);
FromNode.Edges.push_back(Edge{Weight, To});
ToNode.Edges.push_back(Edge{ReverseWeight, From});
}
EdgeIterable edgesFor(const Node &N) const {
const auto &Node = getNode(N);
return EdgeIterable(Node.Edges);
}
bool empty() const { return NodeImpls.empty(); }
std::size_t size() const { return NodeImpls.size(); }
// \brief Gets an arbitrary node in the graph as a starting point for
// traversal.
Node getEntryNode() {
assert(inbounds(StartNode));
return StartNode;
}
};
typedef WeightedBidirectionalGraph<std::pair<EdgeType, StratifiedAttrs>> GraphT;
typedef DenseMap<Value *, GraphT::Node> NodeMapT;
}
// -- Setting up/registering CFLAA pass -- //
char CFLAliasAnalysis::ID = 0;
INITIALIZE_AG_PASS(CFLAliasAnalysis, AliasAnalysis, "cfl-aa",
"CFL-Based AA implementation", false, true, false)
ImmutablePass *llvm::createCFLAliasAnalysisPass() {
return new CFLAliasAnalysis();
}
//===----------------------------------------------------------------------===//
// Function declarations that require types defined in the namespace above
//===----------------------------------------------------------------------===//
// Given an argument number, returns the appropriate Attr index to set.
static StratifiedAttr argNumberToAttrIndex(StratifiedAttr);
// Given a Value, potentially return which AttrIndex it maps to.
static Optional<StratifiedAttr> valueToAttrIndex(Value *Val);
// Gets the inverse of a given EdgeType.
static EdgeType flipWeight(EdgeType);
// Gets edges of the given Instruction*, writing them to the SmallVector*.
static void argsToEdges(CFLAliasAnalysis &, Instruction *,
SmallVectorImpl<Edge> &);
// Gets the "Level" that one should travel in StratifiedSets
// given an EdgeType.
static Level directionOfEdgeType(EdgeType);
// Builds the graph needed for constructing the StratifiedSets for the
// given function
static void buildGraphFrom(CFLAliasAnalysis &, Function *,
SmallVectorImpl<Value *> &, NodeMapT &, GraphT &);
// Builds the graph + StratifiedSets for a function.
static FunctionInfo buildSetsFrom(CFLAliasAnalysis &, Function *);
static Optional<Function *> parentFunctionOfValue(Value *Val) {
if (auto *Inst = dyn_cast<Instruction>(Val)) {
auto *Bb = Inst->getParent();
return Bb->getParent();
}
if (auto *Arg = dyn_cast<Argument>(Val))
return Arg->getParent();
return NoneType();
}
template <typename Inst>
static bool getPossibleTargets(Inst *Call,
SmallVectorImpl<Function *> &Output) {
if (auto *Fn = Call->getCalledFunction()) {
Output.push_back(Fn);
return true;
}
// TODO: If the call is indirect, we might be able to enumerate all potential
// targets of the call and return them, rather than just failing.
return false;
}
static Optional<Value *> getTargetValue(Instruction *Inst) {
GetTargetValueVisitor V;
return V.visit(Inst);
}
static bool hasUsefulEdges(Instruction *Inst) {
bool IsNonInvokeTerminator =
isa<TerminatorInst>(Inst) && !isa<InvokeInst>(Inst);
return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) && !IsNonInvokeTerminator;
}
static Optional<StratifiedAttr> valueToAttrIndex(Value *Val) {
if (isa<GlobalValue>(Val))
return AttrGlobalIndex;
if (auto *Arg = dyn_cast<Argument>(Val))
if (!Arg->hasNoAliasAttr())
return argNumberToAttrIndex(Arg->getArgNo());
return NoneType();
}
static StratifiedAttr argNumberToAttrIndex(unsigned ArgNum) {
if (ArgNum > AttrMaxNumArgs)
return AttrAllIndex;
return ArgNum + AttrFirstArgIndex;
}
static EdgeType flipWeight(EdgeType Initial) {
switch (Initial) {
case EdgeType::Assign:
return EdgeType::Assign;
case EdgeType::Dereference:
return EdgeType::Reference;
case EdgeType::Reference:
return EdgeType::Dereference;
}
llvm_unreachable("Incomplete coverage of EdgeType enum");
}
static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst,
SmallVectorImpl<Edge> &Output) {
GetEdgesVisitor v(Analysis, Output);
v.visit(Inst);
}
static Level directionOfEdgeType(EdgeType Weight) {
switch (Weight) {
case EdgeType::Reference:
return Level::Above;
case EdgeType::Dereference:
return Level::Below;
case EdgeType::Assign:
return Level::Same;
}
llvm_unreachable("Incomplete switch coverage");
}
// Aside: We may remove graph construction entirely, because it doesn't really
// buy us much that we don't already have. I'd like to add interprocedural
// analysis prior to this however, in case that somehow requires the graph
// produced by this for efficient execution
static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn,
SmallVectorImpl<Value *> &ReturnedValues,
NodeMapT &Map, GraphT &Graph) {
const auto findOrInsertNode = [&Map, &Graph](Value *Val) {
auto Pair = Map.insert(std::make_pair(Val, GraphT::Node()));
auto &Iter = Pair.first;
if (Pair.second) {
auto NewNode = Graph.addNode();
Iter->second = NewNode;
}
return Iter->second;
};
SmallVector<Edge, 8> Edges;
for (auto &Bb : Fn->getBasicBlockList()) {
for (auto &Inst : Bb.getInstList()) {
// We don't want the edges of most "return" instructions, but we *do* want
// to know what can be returned.
if (auto *Ret = dyn_cast<ReturnInst>(&Inst))
ReturnedValues.push_back(Ret);
if (!hasUsefulEdges(&Inst))
continue;
Edges.clear();
argsToEdges(Analysis, &Inst, Edges);
// In the case of an unused alloca (or similar), edges may be empty. Note
// that it exists so we can potentially answer NoAlias.
if (Edges.empty()) {
auto MaybeVal = getTargetValue(&Inst);
assert(MaybeVal.hasValue());
auto *Target = *MaybeVal;
findOrInsertNode(Target);
continue;
}
for (const Edge &E : Edges) {
auto To = findOrInsertNode(E.To);
auto From = findOrInsertNode(E.From);
auto FlippedWeight = flipWeight(E.Weight);
auto Attrs = E.AdditionalAttrs;
Graph.addEdge(From, To, {E.Weight, Attrs}, {FlippedWeight, Attrs});
}
}
}
}
static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
NodeMapT Map;
GraphT Graph;
SmallVector<Value *, 4> ReturnedValues;
buildGraphFrom(Analysis, Fn, ReturnedValues, Map, Graph);
DenseMap<GraphT::Node, Value *> NodeValueMap;
NodeValueMap.resize(Map.size());
for (const auto &Pair : Map)
NodeValueMap.insert({Pair.second, Pair.first});
const auto findValueOrDie = [&NodeValueMap](GraphT::Node Node) {
auto ValIter = NodeValueMap.find(Node);
assert(ValIter != NodeValueMap.end());
return ValIter->second;
};
StratifiedSetsBuilder<Value *> Builder;
SmallVector<GraphT::Node, 16> Worklist;
for (auto &Pair : Map) {
Worklist.clear();
auto *Value = Pair.first;
Builder.add(Value);
auto InitialNode = Pair.second;
Worklist.push_back(InitialNode);
while (!Worklist.empty()) {
auto Node = Worklist.pop_back_val();
auto *CurValue = findValueOrDie(Node);
if (isa<Constant>(CurValue) && !isa<GlobalValue>(CurValue))
continue;
for (const auto &EdgeTuple : Graph.edgesFor(Node)) {
auto Weight = std::get<0>(EdgeTuple);
auto Label = Weight.first;
auto &OtherNode = std::get<1>(EdgeTuple);
auto *OtherValue = findValueOrDie(OtherNode);
if (isa<Constant>(OtherValue) && !isa<GlobalValue>(OtherValue))
continue;
bool Added;
switch (directionOfEdgeType(Label)) {
case Level::Above:
Added = Builder.addAbove(CurValue, OtherValue);
break;
case Level::Below:
Added = Builder.addBelow(CurValue, OtherValue);
break;
case Level::Same:
Added = Builder.addWith(CurValue, OtherValue);
break;
}
if (Added) {
auto Aliasing = Weight.second;
if (auto MaybeCurIndex = valueToAttrIndex(CurValue))
Aliasing.set(*MaybeCurIndex);
if (auto MaybeOtherIndex = valueToAttrIndex(OtherValue))
Aliasing.set(*MaybeOtherIndex);
Builder.noteAttributes(CurValue, Aliasing);
Builder.noteAttributes(OtherValue, Aliasing);
Worklist.push_back(OtherNode);
}
}
}
}
// There are times when we end up with parameters not in our graph (i.e. if
// it's only used as the condition of a branch). Other bits of code depend on
// things that were present during construction being present in the graph.
// So, we add all present arguments here.
for (auto &Arg : Fn->args()) {
Builder.add(&Arg);
}
return {Builder.build(), std::move(ReturnedValues)};
}
void CFLAliasAnalysis::scan(Function *Fn) {
auto InsertPair = Cache.insert({Fn, Optional<FunctionInfo>()});
(void)InsertPair;
assert(InsertPair.second &&
"Trying to scan a function that has already been cached");
FunctionInfo Info(buildSetsFrom(*this, Fn));
Cache[Fn] = std::move(Info);
Handles.push_front(FunctionHandle(Fn, this));
}
AliasAnalysis::AliasResult
CFLAliasAnalysis::query(const AliasAnalysis::Location &LocA,
const AliasAnalysis::Location &LocB) {
auto *ValA = const_cast<Value *>(LocA.Ptr);
auto *ValB = const_cast<Value *>(LocB.Ptr);
Function *Fn = nullptr;
auto MaybeFnA = parentFunctionOfValue(ValA);
auto MaybeFnB = parentFunctionOfValue(ValB);
if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) {
llvm_unreachable("Don't know how to extract the parent function "
"from values A or B");
}
if (MaybeFnA.hasValue()) {
Fn = *MaybeFnA;
assert((!MaybeFnB.hasValue() || *MaybeFnB == *MaybeFnA) &&
"Interprocedural queries not supported");
} else {
Fn = *MaybeFnB;
}
assert(Fn != nullptr);
auto &MaybeInfo = ensureCached(Fn);
assert(MaybeInfo.hasValue());
auto &Sets = MaybeInfo->Sets;
auto MaybeA = Sets.find(ValA);
if (!MaybeA.hasValue())
return AliasAnalysis::MayAlias;
auto MaybeB = Sets.find(ValB);
if (!MaybeB.hasValue())
return AliasAnalysis::MayAlias;
auto SetA = *MaybeA;
auto SetB = *MaybeB;
if (SetA.Index == SetB.Index)
return AliasAnalysis::PartialAlias;
auto AttrsA = Sets.getLink(SetA.Index).Attrs;
auto AttrsB = Sets.getLink(SetB.Index).Attrs;
auto CombinedAttrs = AttrsA | AttrsB;
if (CombinedAttrs.any())
return AliasAnalysis::PartialAlias;
return AliasAnalysis::NoAlias;
}

View File

@ -11,6 +11,7 @@ add_llvm_library(LLVMAnalysis
BranchProbabilityInfo.cpp
CFG.cpp
CFGPrinter.cpp
CFLAliasAnalysis.cpp
CGSCCPassManager.cpp
CaptureTracking.cpp
CostModel.cpp

View File

@ -0,0 +1,691 @@
//===- StratifiedSets.h - Abstract stratified sets implementation. --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_STRATIFIEDSETS_H
#define LLVM_ADT_STRATIFIEDSETS_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include <bitset>
#include <cassert>
#include <cmath>
#include <limits>
#include <type_traits>
#include <vector>
namespace llvm {
// \brief An index into Stratified Sets.
typedef unsigned StratifiedIndex;
// NOTE: ^ This can't be a short -- bootstrapping clang has a case where
// ~1M sets exist.
// \brief Container of information related to a value in a StratifiedSet.
struct StratifiedInfo {
StratifiedIndex Index;
// For field sensitivity, etc. we can tack attributes on to this struct.
};
// The number of attributes that StratifiedAttrs should contain. Attributes are
// described below, and 32 was an arbitrary choice because it fits nicely in 32
// bits (because we use a bitset for StratifiedAttrs).
static constexpr unsigned NumStratifiedAttrs = 32;
// These are attributes that the users of StratifiedSets/StratifiedSetBuilders
// may use for various purposes. These also have the special property of that
// they are merged down. So, if set A is above set B, and one decides to set an
// attribute in set A, then the attribute will automatically be set in set B.
typedef std::bitset<NumStratifiedAttrs> StratifiedAttrs;
// \brief A "link" between two StratifiedSets.
struct StratifiedLink {
// \brief This is a value used to signify "does not exist" where
// the StratifiedIndex type is used. This is used instead of
// Optional<StratifiedIndex> because Optional<StratifiedIndex> would
// eat up a considerable amount of extra memory, after struct
// padding/alignment is taken into account.
static constexpr auto SetSentinel =
std::numeric_limits<StratifiedIndex>::max();
// \brief The index for the set "above" current
StratifiedIndex Above;
// \brief The link for the set "below" current
StratifiedIndex Below;
// \brief Attributes for these StratifiedSets.
StratifiedAttrs Attrs;
StratifiedLink() : Above(SetSentinel), Below(SetSentinel) {}
bool hasBelow() const { return Below != SetSentinel; }
bool hasAbove() const { return Above != SetSentinel; }
void clearBelow() { Below = SetSentinel; }
void clearAbove() { Above = SetSentinel; }
};
// \brief These are stratified sets, as described in "Fast algorithms for
// Dyck-CFL-reachability with applications to Alias Analysis" by Zhang Q, Lyu M
// R, Yuan H, and Su Z. -- in short, this is meant to represent different sets
// of Value*s. If two Value*s are in the same set, or if both sets have
// overlapping attributes, then the Value*s are said to alias.
//
// Sets may be related by position, meaning that one set may be considered as
// above or below another. In CFL Alias Analysis, this gives us an indication
// of how two variables are related; if the set of variable A is below a set
// containing variable B, then at some point, a variable that has interacted
// with B (or B itself) was either used in order to extract the variable A, or
// was used as storage of variable A.
//
// Sets may also have attributes (as noted above). These attributes are
// generally used for noting whether a variable in the set has interacted with
// a variable whose origins we don't quite know (i.e. globals/arguments), or if
// the variable may have had operations performed on it (modified in a function
// call). All attributes that exist in a set A must exist in all sets marked as
// below set A.
template <typename T> class StratifiedSets {
public:
StratifiedSets() {}
StratifiedSets(DenseMap<T, StratifiedInfo> Map,
std::vector<StratifiedLink> Links)
: Values(std::move(Map)), Links(std::move(Links)) {}
StratifiedSets(StratifiedSets<T> &&Other) { *this = std::move(Other); }
StratifiedSets &operator=(StratifiedSets<T> &&Other) {
Values = std::move(Other.Values);
Links = std::move(Other.Links);
return *this;
}
Optional<StratifiedInfo> find(const T &Elem) const {
auto Iter = Values.find(Elem);
if (Iter == Values.end()) {
return NoneType();
}
return Iter->second;
}
const StratifiedLink &getLink(StratifiedIndex Index) const {
assert(inbounds(Index));
return Links[Index];
}
private:
DenseMap<T, StratifiedInfo> Values;
std::vector<StratifiedLink> Links;
bool inbounds(StratifiedIndex Idx) const { return Idx < Links.size(); }
};
// \brief Generic Builder class that produces StratifiedSets instances.
//
// The goal of this builder is to efficiently produce correct StratifiedSets
// instances. To this end, we use a few tricks:
// > Set chains (A method for linking sets together)
// > Set remaps (A method for marking a set as an alias [irony?] of another)
//
// ==== Set chains ====
// This builder has a notion of some value A being above, below, or with some
// other value B:
// > The `A above B` relationship implies that there is a reference edge going
// from A to B. Namely, it notes that A can store anything in B's set.
// > The `A below B` relationship is the opposite of `A above B`. It implies
// that there's a dereference edge going from A to B.
// > The `A with B` relationship states that there's an assignment edge going
// from A to B, and that A and B should be treated as equals.
//
// As an example, take the following code snippet:
//
// %a = alloca i32, align 4
// %ap = alloca i32*, align 8
// %app = alloca i32**, align 8
// store %a, %ap
// store %ap, %app
// %aw = getelementptr %ap, 0
//
// Given this, the follow relations exist:
// - %a below %ap & %ap above %a
// - %ap below %app & %app above %ap
// - %aw with %ap & %ap with %aw
//
// These relations produce the following sets:
// [{%a}, {%ap, %aw}, {%app}]
//
// ...Which states that the only MayAlias relationship in the above program is
// between %ap and %aw.
//
// Life gets more complicated when we actually have logic in our programs. So,
// we either must remove this logic from our programs, or make consessions for
// it in our AA algorithms. In this case, we have decided to select the latter
// option.
//
// First complication: Conditionals
// Motivation:
// %ad = alloca int, align 4
// %a = alloca int*, align 8
// %b = alloca int*, align 8
// %bp = alloca int**, align 8
// %c = call i1 @SomeFunc()
// %k = select %c, %ad, %bp
// store %ad, %a
// store %b, %bp
//
// %k has 'with' edges to both %a and %b, which ordinarily would not be linked
// together. So, we merge the set that contains %a with the set that contains
// %b. We then recursively merge the set above %a with the set above %b, and
// the set below %a with the set below %b, etc. Ultimately, the sets for this
// program would end up like: {%ad}, {%a, %b, %k}, {%bp}, where {%ad} is below
// {%a, %b, %c} is below {%ad}.
//
// Second complication: Arbitrary casts
// Motivation:
// %ip = alloca int*, align 8
// %ipp = alloca int**, align 8
// %i = bitcast ipp to int
// store %ip, %ipp
// store %i, %ip
//
// This is impossible to construct with any of the rules above, because a set
// containing both {%i, %ipp} is supposed to exist, the set with %i is supposed
// to be below the set with %ip, and the set with %ip is supposed to be below
// the set with %ipp. Because we don't allow circular relationships like this,
// we merge all concerned sets into one. So, the above code would generate a
// single StratifiedSet: {%ip, %ipp, %i}.
//
// ==== Set remaps ====
// More of an implementation detail than anything -- when merging sets, we need
// to update the numbers of all of the elements mapped to those sets. Rather
// than doing this at each merge, we note in the BuilderLink structure that a
// remap has occurred, and use this information so we can defer renumbering set
// elements until build time.
template <typename T> class StratifiedSetsBuilder {
// \brief Represents a Stratified Set, with information about the Stratified
// Set above it, the set below it, and whether the current set has been
// remapped to another.
struct BuilderLink {
const StratifiedIndex Number;
BuilderLink(StratifiedIndex N) : Number(N) {
Remap = StratifiedLink::SetSentinel;
}
bool hasAbove() const {
assert(!isRemapped());
return Link.hasAbove();
}
bool hasBelow() const {
assert(!isRemapped());
return Link.hasBelow();
}
void setBelow(StratifiedIndex I) {
assert(!isRemapped());
Link.Below = I;
}
void setAbove(StratifiedIndex I) {
assert(!isRemapped());
Link.Above = I;
}
void clearBelow() {
assert(!isRemapped());
Link.clearBelow();
}
void clearAbove() {
assert(!isRemapped());
Link.clearAbove();
}
StratifiedIndex getBelow() const {
assert(!isRemapped());
assert(hasBelow());
return Link.Below;
}
StratifiedIndex getAbove() const {
assert(!isRemapped());
assert(hasAbove());
return Link.Above;
}
StratifiedAttrs &getAttrs() {
assert(!isRemapped());
return Link.Attrs;
}
void setAttr(unsigned index) {
assert(!isRemapped());
assert(index < NumStratifiedAttrs);
Link.Attrs.set(index);
}
void setAttrs(const StratifiedAttrs &other) {
assert(!isRemapped());
Link.Attrs |= other;
}
bool isRemapped() const { return Remap != StratifiedLink::SetSentinel; }
// \brief For initial remapping to another set
void remapTo(StratifiedIndex Other) {
assert(!isRemapped());
Remap = Other;
}
StratifiedIndex getRemapIndex() const {
assert(isRemapped());
return Remap;
}
// \brief Should only be called when we're already remapped.
void updateRemap(StratifiedIndex Other) {
assert(isRemapped());
Remap = Other;
}
// \brief Prefer the above functions to calling things directly on what's
// returned from this -- they guard against unexpected calls when the
// current BuilderLink is remapped.
const StratifiedLink &getLink() const { return Link; }
private:
StratifiedLink Link;
StratifiedIndex Remap;
};
// \brief This function performs all of the set unioning/value renumbering
// that we've been putting off, and generates a vector<StratifiedLink> that
// may be placed in a StratifiedSets instance.
void finalizeSets(std::vector<StratifiedLink> &StratLinks) {
DenseMap<StratifiedIndex, StratifiedIndex> Remaps;
for (auto &Link : Links) {
if (Link.isRemapped()) {
continue;
}
StratifiedIndex Number = StratLinks.size();
Remaps.insert({Link.Number, Number});
StratLinks.push_back(Link.getLink());
}
for (auto &Link : StratLinks) {
if (Link.hasAbove()) {
auto &Above = linksAt(Link.Above);
auto Iter = Remaps.find(Above.Number);
assert(Iter != Remaps.end());
Link.Above = Iter->second;
}
if (Link.hasBelow()) {
auto &Below = linksAt(Link.Below);
auto Iter = Remaps.find(Below.Number);
assert(Iter != Remaps.end());
Link.Below = Iter->second;
}
}
for (auto &Pair : Values) {
auto &Info = Pair.second;
auto &Link = linksAt(Info.Index);
auto Iter = Remaps.find(Link.Number);
assert(Iter != Remaps.end());
Info.Index = Iter->second;
}
}
// \brief There's a guarantee in StratifiedLink where all bits set in a
// Link.externals will be set in all Link.externals "below" it.
static void propagateAttrs(std::vector<StratifiedLink> &Links) {
const auto getHighestParentAbove = [&Links](StratifiedIndex Idx) {
const auto *Link = &Links[Idx];
while (Link->hasAbove()) {
Idx = Link->Above;
Link = &Links[Idx];
}
return Idx;
};
SmallSet<StratifiedIndex, 16> Visited;
for (unsigned I = 0, E = Links.size(); I < E; ++I) {
auto CurrentIndex = getHighestParentAbove(I);
if (!Visited.insert(CurrentIndex)) {
continue;
}
while (Links[CurrentIndex].hasBelow()) {
auto &CurrentBits = Links[CurrentIndex].Attrs;
auto NextIndex = Links[CurrentIndex].Below;
auto &NextBits = Links[NextIndex].Attrs;
NextBits |= CurrentBits;
CurrentIndex = NextIndex;
}
}
}
public:
// \brief Builds a StratifiedSet from the information we've been given since
// either construction or the prior build() call.
StratifiedSets<T> build() {
std::vector<StratifiedLink> StratLinks;
finalizeSets(StratLinks);
propagateAttrs(StratLinks);
Links.clear();
return StratifiedSets<T>(std::move(Values), std::move(StratLinks));
}
std::size_t size() const { return Values.size(); }
std::size_t numSets() const { return Links.size(); }
bool has(const T &Elem) const { return get(Elem).hasValue(); }
bool add(const T &Main) {
if (get(Main).hasValue())
return false;
auto NewIndex = getNewUnlinkedIndex();
return addAtMerging(Main, NewIndex);
}
// \brief Restructures the stratified sets as necessary to make "ToAdd" in a
// set above "Main". There are some cases where this is not possible (see
// above), so we merge them such that ToAdd and Main are in the same set.
bool addAbove(const T &Main, const T &ToAdd) {
assert(has(Main));
auto Index = *indexOf(Main);
if (!linksAt(Index).hasAbove())
addLinkAbove(Index);
auto Above = linksAt(Index).getAbove();
return addAtMerging(ToAdd, Above);
}
// \brief Restructures the stratified sets as necessary to make "ToAdd" in a
// set below "Main". There are some cases where this is not possible (see
// above), so we merge them such that ToAdd and Main are in the same set.
bool addBelow(const T &Main, const T &ToAdd) {
assert(has(Main));
auto Index = *indexOf(Main);
if (!linksAt(Index).hasBelow())
addLinkBelow(Index);
auto Below = linksAt(Index).getBelow();
return addAtMerging(ToAdd, Below);
}
bool addWith(const T &Main, const T &ToAdd) {
assert(has(Main));
auto MainIndex = *indexOf(Main);
return addAtMerging(ToAdd, MainIndex);
}
void noteAttribute(const T &Main, unsigned AttrNum) {
assert(has(Main));
assert(AttrNum < StratifiedLink::SetSentinel);
auto *Info = *get(Main);
auto &Link = linksAt(Info->Index);
Link.setAttr(AttrNum);
}
void noteAttributes(const T &Main, const StratifiedAttrs &NewAttrs) {
assert(has(Main));
auto *Info = *get(Main);
auto &Link = linksAt(Info->Index);
Link.setAttrs(NewAttrs);
}
StratifiedAttrs getAttributes(const T &Main) {
assert(has(Main));
auto *Info = *get(Main);
auto *Link = &linksAt(Info->Index);
auto Attrs = Link->getAttrs();
while (Link->hasAbove()) {
Link = &linksAt(Link->getAbove());
Attrs |= Link->getAttrs();
}
return Attrs;
}
bool getAttribute(const T &Main, unsigned AttrNum) {
assert(AttrNum < StratifiedLink::SetSentinel);
auto Attrs = getAttributes(Main);
return Attrs[AttrNum];
}
// \brief Gets the attributes that have been applied to the set that Main
// belongs to. It ignores attributes in any sets above the one that Main
// resides in.
StratifiedAttrs getRawAttributes(const T &Main) {
assert(has(Main));
auto *Info = *get(Main);
auto &Link = linksAt(Info->Index);
return Link.getAttrs();
}
// \brief Gets an attribute from the attributes that have been applied to the
// set that Main belongs to. It ignores attributes in any sets above the one
// that Main resides in.
bool getRawAttribute(const T &Main, unsigned AttrNum) {
assert(AttrNum < StratifiedLink::SetSentinel);
auto Attrs = getRawAttributes(Main);
return Attrs[AttrNum];
}
private:
DenseMap<T, StratifiedInfo> Values;
std::vector<BuilderLink> Links;
// \brief Adds the given element at the given index, merging sets if
// necessary.
bool addAtMerging(const T &ToAdd, StratifiedIndex Index) {
StratifiedInfo Info = {Index};
auto Pair = Values.insert({ToAdd, Info});
if (Pair.second)
return true;
auto &Iter = Pair.first;
auto &IterSet = linksAt(Iter->second.Index);
auto &ReqSet = linksAt(Index);
// Failed to add where we wanted to. Merge the sets.
if (&IterSet != &ReqSet)
merge(IterSet.Number, ReqSet.Number);
return false;
}
// \brief Gets the BuilderLink at the given index, taking set remapping into
// account.
BuilderLink &linksAt(StratifiedIndex Index) {
auto *Start = &Links[Index];
if (!Start->isRemapped())
return *Start;
auto *Current = Start;
while (Current->isRemapped())
Current = &Links[Current->getRemapIndex()];
auto NewRemap = Current->Number;
// Run through everything that has yet to be updated, and update them to
// remap to NewRemap
Current = Start;
while (Current->isRemapped()) {
auto *Next = &Links[Current->getRemapIndex()];
Current->updateRemap(NewRemap);
Current = Next;
}
return *Current;
}
// \brief Merges two sets into one another. Assumes that these sets are not
// already one in the same
void merge(StratifiedIndex Idx1, StratifiedIndex Idx2) {
assert(inbounds(Idx1) && inbounds(Idx2));
assert(&linksAt(Idx1) != &linksAt(Idx2) &&
"Merging a set into itself is not allowed");
// CASE 1: If the set at `Idx1` is above or below `Idx2`, we need to merge
// both the
// given sets, and all sets between them, into one.
if (tryMergeUpwards(Idx1, Idx2))
return;
if (tryMergeUpwards(Idx2, Idx1))
return;
// CASE 2: The set at `Idx1` is not in the same chain as the set at `Idx2`.
// We therefore need to merge the two chains together.
mergeDirect(Idx1, Idx2);
}
// \brief Merges two sets assuming that the set at `Idx1` is unreachable from
// traversing above or below the set at `Idx2`.
void mergeDirect(StratifiedIndex Idx1, StratifiedIndex Idx2) {
assert(inbounds(Idx1) && inbounds(Idx2));
auto *LinksInto = &linksAt(Idx1);
auto *LinksFrom = &linksAt(Idx2);
// Merging everything above LinksInto then proceeding to merge everything
// below LinksInto becomes problematic, so we go as far "up" as possible!
while (LinksInto->hasAbove() && LinksFrom->hasAbove()) {
LinksInto = &linksAt(LinksInto->getAbove());
LinksFrom = &linksAt(LinksFrom->getAbove());
}
if (LinksFrom->hasAbove()) {
LinksInto->setAbove(LinksFrom->getAbove());
auto &NewAbove = linksAt(LinksInto->getAbove());
NewAbove.setBelow(LinksInto->Number);
}
// Merging strategy:
// > If neither has links below, stop.
// > If only `LinksInto` has links below, stop.
// > If only `LinksFrom` has links below, reset `LinksInto.Below` to
// match `LinksFrom.Below`
// > If both have links above, deal with those next.
while (LinksInto->hasBelow() && LinksFrom->hasBelow()) {
auto &FromAttrs = LinksFrom->getAttrs();
LinksInto->setAttrs(FromAttrs);
// Remap needs to happen after getBelow(), but before
// assignment of LinksFrom
auto *NewLinksFrom = &linksAt(LinksFrom->getBelow());
LinksFrom->remapTo(LinksInto->Number);
LinksFrom = NewLinksFrom;
LinksInto = &linksAt(LinksInto->getBelow());
}
if (LinksFrom->hasBelow()) {
LinksInto->setBelow(LinksFrom->getBelow());
auto &NewBelow = linksAt(LinksInto->getBelow());
NewBelow.setAbove(LinksInto->Number);
}
LinksFrom->remapTo(LinksInto->Number);
}
// \brief Checks to see if lowerIndex is at a level lower than upperIndex.
// If so, it will merge lowerIndex with upperIndex (and all of the sets
// between) and return true. Otherwise, it will return false.
bool tryMergeUpwards(StratifiedIndex LowerIndex, StratifiedIndex UpperIndex) {
assert(inbounds(LowerIndex) && inbounds(UpperIndex));
auto *Lower = &linksAt(LowerIndex);
auto *Upper = &linksAt(UpperIndex);
if (Lower == Upper)
return true;
SmallVector<BuilderLink *, 8> Found;
auto *Current = Lower;
auto Attrs = Current->getAttrs();
while (Current->hasAbove() && Current != Upper) {
Found.push_back(Current);
Attrs |= Current->getAttrs();
Current = &linksAt(Current->getAbove());
}
if (Current != Upper)
return false;
Upper->setAttrs(Attrs);
if (Lower->hasBelow()) {
auto NewBelowIndex = Lower->getBelow();
Upper->setBelow(NewBelowIndex);
auto &NewBelow = linksAt(NewBelowIndex);
NewBelow.setAbove(UpperIndex);
} else {
Upper->clearBelow();
}
for (const auto &Ptr : Found)
Ptr->remapTo(Upper->Number);
return true;
}
Optional<const StratifiedInfo *> get(const T &Val) const {
auto Result = Values.find(Val);
if (Result == Values.end())
return NoneType();
return &Result->second;
}
Optional<StratifiedInfo *> get(const T &Val) {
auto Result = Values.find(Val);
if (Result == Values.end())
return NoneType();
return &Result->second;
}
Optional<StratifiedIndex> indexOf(const T &Val) {
auto MaybeVal = get(Val);
if (!MaybeVal.hasValue())
return NoneType();
auto *Info = *MaybeVal;
auto &Link = linksAt(Info->Index);
return Link.Number;
}
StratifiedIndex addLinkBelow(StratifiedIndex Set) {
auto At = addLinks();
Links[Set].setBelow(At);
Links[At].setAbove(Set);
return At;
}
StratifiedIndex addLinkAbove(StratifiedIndex Set) {
auto At = addLinks();
Links[At].setBelow(Set);
Links[Set].setAbove(At);
return At;
}
StratifiedIndex getNewUnlinkedIndex() { return addLinks(); }
StratifiedIndex addLinks() {
auto Link = Links.size();
Links.push_back(BuilderLink(Link));
return Link;
}
bool inbounds(StratifiedIndex N) const { return N >= 0 && N < Links.size(); }
};
}
#endif // LLVM_ADT_STRATIFIEDSETS_H

View File

@ -0,0 +1,21 @@
; This testcase consists of alias relations which should be completely
; resolvable by cfl-aa, but require analysis of getelementptr constant exprs.
; Derived from BasicAA/2003-12-11-ConstExprGEP.ll
; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
%T = type { i32, [10 x i8] }
@G = external global %T
; CHECK: Function: test
; CHECK-NOT: May:
define void @test() {
%D = getelementptr %T* @G, i64 0, i32 0
%E = getelementptr %T* @G, i64 0, i32 1, i64 5
%F = getelementptr i32* getelementptr (%T* @G, i64 0, i32 0), i64 0
%X = getelementptr [10 x i8]* getelementptr (%T* @G, i64 0, i32 1), i64 0, i64 5
ret void
}

View File

@ -0,0 +1,30 @@
; RUN: opt < %s -cfl-aa -aa-eval -print-all-alias-modref-info 2>&1 | FileCheck %s
; CFL AA currently returns PartialAlias, BasicAA returns MayAlias, both seem
; acceptable (although we might decide that we don't want PartialAlias, and if
; so, we should update this test case accordingly).
; CHECK: {{PartialAlias|MayAlias}}: double* %p.0.i.0, double* %p3
; %p3 is equal to %p.0.i.0 on the second iteration of the loop,
; so MayAlias is needed.
define void @foo([3 x [3 x double]]* noalias %p) {
entry:
%p3 = getelementptr [3 x [3 x double]]* %p, i64 0, i64 0, i64 3
br label %loop
loop:
%i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
%p.0.i.0 = getelementptr [3 x [3 x double]]* %p, i64 0, i64 %i, i64 0
store volatile double 0.0, double* %p3
store volatile double 0.1, double* %p.0.i.0
%i.next = add i64 %i, 1
%cmp = icmp slt i64 %i.next, 3
br i1 %cmp, label %loop, label %exit
exit:
ret void
}

View File

@ -0,0 +1,12 @@
; RUN: opt < %s -cfl-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
; CHECK: Function: foo:
; CHECK-NEXT: NoAlias: {}* %p, {}* %q
define void @foo({}* %p, {}* %q) {
store {} {}, {}* %p
store {} {}, {}* %q
ret void
}

View File

@ -0,0 +1,37 @@
; RUN: opt -S -tbaa -cfl-aa -gvn < %s | FileCheck -check-prefix=CFLAA %s
; RUN: opt -S -tbaa -gvn < %s | FileCheck %s
; Adapted from the BasicAA full-store-partial-alias.ll test.
; CFL AA should notice that the store stores to the entire %u object,
; so the %tmp5 load is PartialAlias with the store and suppress TBAA.
; Without CFL AA, TBAA should say that %tmp5 is NoAlias with the store.
target datalayout = "e-p:64:64:64"
%union.anon = type { double }
@u = global %union.anon { double -2.500000e-01 }, align 8
@endianness_test = global i64 1, align 8
define i32 @signbit(double %x) nounwind {
; CFLAA: ret i32 %tmp5.lobit
; CHECK: ret i32 0
entry:
%u = alloca %union.anon, align 8
%tmp9 = getelementptr inbounds %union.anon* %u, i64 0, i32 0
store double %x, double* %tmp9, align 8, !tbaa !0
%tmp2 = load i32* bitcast (i64* @endianness_test to i32*), align 8, !tbaa !3
%idxprom = sext i32 %tmp2 to i64
%tmp4 = bitcast %union.anon* %u to [2 x i32]*
%arrayidx = getelementptr inbounds [2 x i32]* %tmp4, i64 0, i64 %idxprom
%tmp5 = load i32* %arrayidx, align 4, !tbaa !3
%tmp5.lobit = lshr i32 %tmp5, 31
ret i32 %tmp5.lobit
}
!0 = metadata !{metadata !4, metadata !4, i64 0}
!1 = metadata !{metadata !"omnipotent char", metadata !2}
!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
!3 = metadata !{metadata !5, metadata !5, i64 0}
!4 = metadata !{metadata !"double", metadata !1}
!5 = metadata !{metadata !"int", metadata !1}

View File

@ -0,0 +1,17 @@
; RUN: opt < %s -cfl-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
; Derived from BasicAA/2010-09-15-GEP-SignedArithmetic.ll
target datalayout = "e-p:32:32:32"
; CHECK: 1 partial alias response
define i32 @test(i32* %tab, i32 %indvar) nounwind {
%tmp31 = mul i32 %indvar, -2
%tmp32 = add i32 %tmp31, 30
%t.5 = getelementptr i32* %tab, i32 %tmp32
%loada = load i32* %tab
store i32 0, i32* %t.5
%loadb = load i32* %tab
%rval = add i32 %loada, %loadb
ret i32 %rval
}

View File

@ -0,0 +1,39 @@
; RUN: opt < %s -cfl-aa -aa-eval -print-all-alias-modref-info 2>&1 | FileCheck %s
; When merging MustAlias and PartialAlias, merge to PartialAlias
; instead of MayAlias.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
; CHECK: PartialAlias: i16* %bigbase0, i8* %phi
define i8 @test0(i8* %base, i1 %x) {
entry:
%baseplusone = getelementptr i8* %base, i64 1
br i1 %x, label %red, label %green
red:
br label %green
green:
%phi = phi i8* [ %baseplusone, %red ], [ %base, %entry ]
store i8 0, i8* %phi
%bigbase0 = bitcast i8* %base to i16*
store i16 -1, i16* %bigbase0
%loaded = load i8* %phi
ret i8 %loaded
}
; CHECK: PartialAlias: i16* %bigbase1, i8* %sel
define i8 @test1(i8* %base, i1 %x) {
entry:
%baseplusone = getelementptr i8* %base, i64 1
%sel = select i1 %x, i8* %baseplusone, i8* %base
store i8 0, i8* %sel
%bigbase1 = bitcast i8* %base to i16*
store i16 -1, i16* %bigbase1
%loaded = load i8* %sel
ret i8 %loaded
}

View File

@ -0,0 +1,36 @@
; RUN: opt < %s -cfl-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
; Derived from (a subset of) BasicAA/phi-and-select.ll
; CHECK: Function: qux
; CHECK: NoAlias: double* %a, double* %b
; CHECK: ===== Alias Analysis Evaluator Report =====
; Two PHIs with disjoint sets of inputs.
define void @qux(i1 %m, double* noalias %x, double* noalias %y,
i1 %n, double* noalias %v, double* noalias %w) {
entry:
br i1 %m, label %true, label %false
true:
br label %exit
false:
br label %exit
exit:
%a = phi double* [ %x, %true ], [ %y, %false ]
br i1 %n, label %ntrue, label %nfalse
ntrue:
br label %nexit
nfalse:
br label %nexit
nexit:
%b = phi double* [ %v, %ntrue ], [ %w, %nfalse ]
store volatile double 0.0, double* %a
store volatile double 1.0, double* %b
ret void
}

View File

@ -0,0 +1,18 @@
; This testcase consists of alias relations which should be completely
; resolvable by cfl-aa (derived from BasicAA/2003-11-04-SimpleCases.ll).
; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
%T = type { i32, [10 x i8] }
; CHECK: Function: test
; CHECK-NOT: May:
define void @test(%T* %P) {
%A = getelementptr %T* %P, i64 0
%B = getelementptr %T* %P, i64 0, i32 0
%C = getelementptr %T* %P, i64 0, i32 1
%D = getelementptr %T* %P, i64 0, i32 1, i64 0
%E = getelementptr %T* %P, i64 0, i32 1, i64 5
ret void
}