forked from OSchip/llvm-project
930 lines
30 KiB
C++
930 lines
30 KiB
C++
//===- GVNSink.cpp - sink expressions into successors ---------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
/// \file GVNSink.cpp
|
|
/// This pass attempts to sink instructions into successors, reducing static
|
|
/// instruction count and enabling if-conversion.
|
|
///
|
|
/// We use a variant of global value numbering to decide what can be sunk.
|
|
/// Consider:
|
|
///
|
|
/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
|
|
/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
|
|
/// \ /
|
|
/// [ %e = phi i32 %a2, %c2 ]
|
|
/// [ add i32 %e, 4 ]
|
|
///
|
|
///
|
|
/// GVN would number %a1 and %c1 differently because they compute different
|
|
/// results - the VN of an instruction is a function of its opcode and the
|
|
/// transitive closure of its operands. This is the key property for hoisting
|
|
/// and CSE.
|
|
///
|
|
/// What we want when sinking however is for a numbering that is a function of
|
|
/// the *uses* of an instruction, which allows us to answer the question "if I
|
|
/// replace %a1 with %c1, will it contribute in an equivalent way to all
|
|
/// successive instructions?". The PostValueTable class in GVN provides this
|
|
/// mapping.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
#include "llvm/ADT/DenseMap.h"
|
|
#include "llvm/ADT/DenseMapInfo.h"
|
|
#include "llvm/ADT/DenseSet.h"
|
|
#include "llvm/ADT/Hashing.h"
|
|
#include "llvm/ADT/None.h"
|
|
#include "llvm/ADT/Optional.h"
|
|
#include "llvm/ADT/PostOrderIterator.h"
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/ADT/SmallPtrSet.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/ADT/StringExtras.h"
|
|
#include "llvm/Analysis/GlobalsModRef.h"
|
|
#include "llvm/Transforms/Utils/Local.h"
|
|
#include "llvm/IR/BasicBlock.h"
|
|
#include "llvm/IR/CFG.h"
|
|
#include "llvm/IR/Constants.h"
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/IR/InstrTypes.h"
|
|
#include "llvm/IR/Instruction.h"
|
|
#include "llvm/IR/Instructions.h"
|
|
#include "llvm/IR/PassManager.h"
|
|
#include "llvm/IR/Type.h"
|
|
#include "llvm/IR/Use.h"
|
|
#include "llvm/IR/Value.h"
|
|
#include "llvm/Pass.h"
|
|
#include "llvm/Support/Allocator.h"
|
|
#include "llvm/Support/ArrayRecycler.h"
|
|
#include "llvm/Support/AtomicOrdering.h"
|
|
#include "llvm/Support/Casting.h"
|
|
#include "llvm/Support/Compiler.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include "llvm/Transforms/Scalar.h"
|
|
#include "llvm/Transforms/Scalar/GVN.h"
|
|
#include "llvm/Transforms/Scalar/GVNExpression.h"
|
|
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <iterator>
|
|
#include <utility>
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "gvn-sink"
|
|
|
|
STATISTIC(NumRemoved, "Number of instructions removed");
|
|
|
|
namespace llvm {
|
|
namespace GVNExpression {
|
|
|
|
LLVM_DUMP_METHOD void Expression::dump() const {
|
|
print(dbgs());
|
|
dbgs() << "\n";
|
|
}
|
|
|
|
} // end namespace GVNExpression
|
|
} // end namespace llvm
|
|
|
|
namespace {
|
|
|
|
static bool isMemoryInst(const Instruction *I) {
|
|
return isa<LoadInst>(I) || isa<StoreInst>(I) ||
|
|
(isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
|
|
(isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
|
|
}
|
|
|
|
/// Iterates through instructions in a set of blocks in reverse order from the
|
|
/// first non-terminator. For example (assume all blocks have size n):
|
|
/// LockstepReverseIterator I([B1, B2, B3]);
|
|
/// *I-- = [B1[n], B2[n], B3[n]];
|
|
/// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
|
|
/// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
|
|
/// ...
|
|
///
|
|
/// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
|
|
/// to
|
|
/// determine which blocks are still going and the order they appear in the
|
|
/// list returned by operator*.
|
|
class LockstepReverseIterator {
|
|
ArrayRef<BasicBlock *> Blocks;
|
|
SmallSetVector<BasicBlock *, 4> ActiveBlocks;
|
|
SmallVector<Instruction *, 4> Insts;
|
|
bool Fail;
|
|
|
|
public:
|
|
LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
|
|
reset();
|
|
}
|
|
|
|
void reset() {
|
|
Fail = false;
|
|
ActiveBlocks.clear();
|
|
for (BasicBlock *BB : Blocks)
|
|
ActiveBlocks.insert(BB);
|
|
Insts.clear();
|
|
for (BasicBlock *BB : Blocks) {
|
|
if (BB->size() <= 1) {
|
|
// Block wasn't big enough - only contained a terminator.
|
|
ActiveBlocks.remove(BB);
|
|
continue;
|
|
}
|
|
Insts.push_back(BB->getTerminator()->getPrevNode());
|
|
}
|
|
if (Insts.empty())
|
|
Fail = true;
|
|
}
|
|
|
|
bool isValid() const { return !Fail; }
|
|
ArrayRef<Instruction *> operator*() const { return Insts; }
|
|
|
|
// Note: This needs to return a SmallSetVector as the elements of
|
|
// ActiveBlocks will be later copied to Blocks using std::copy. The
|
|
// resultant order of elements in Blocks needs to be deterministic.
|
|
// Using SmallPtrSet instead causes non-deterministic order while
|
|
// copying. And we cannot simply sort Blocks as they need to match the
|
|
// corresponding Values.
|
|
SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
|
|
|
|
void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
|
|
for (auto II = Insts.begin(); II != Insts.end();) {
|
|
if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) ==
|
|
Blocks.end()) {
|
|
ActiveBlocks.remove((*II)->getParent());
|
|
II = Insts.erase(II);
|
|
} else {
|
|
++II;
|
|
}
|
|
}
|
|
}
|
|
|
|
void operator--() {
|
|
if (Fail)
|
|
return;
|
|
SmallVector<Instruction *, 4> NewInsts;
|
|
for (auto *Inst : Insts) {
|
|
if (Inst == &Inst->getParent()->front())
|
|
ActiveBlocks.remove(Inst->getParent());
|
|
else
|
|
NewInsts.push_back(Inst->getPrevNode());
|
|
}
|
|
if (NewInsts.empty()) {
|
|
Fail = true;
|
|
return;
|
|
}
|
|
Insts = NewInsts;
|
|
}
|
|
};
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// Candidate solution for sinking. There may be different ways to
|
|
/// sink instructions, differing in the number of instructions sunk,
|
|
/// the number of predecessors sunk from and the number of PHIs
|
|
/// required.
|
|
struct SinkingInstructionCandidate {
|
|
unsigned NumBlocks;
|
|
unsigned NumInstructions;
|
|
unsigned NumPHIs;
|
|
unsigned NumMemoryInsts;
|
|
int Cost = -1;
|
|
SmallVector<BasicBlock *, 4> Blocks;
|
|
|
|
void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
|
|
unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
|
|
unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
|
|
Cost = (NumInstructions * (NumBlocks - 1)) -
|
|
(NumExtraPHIs *
|
|
NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
|
|
- SplitEdgeCost;
|
|
}
|
|
|
|
bool operator>(const SinkingInstructionCandidate &Other) const {
|
|
return Cost > Other.Cost;
|
|
}
|
|
};
|
|
|
|
#ifndef NDEBUG
|
|
raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
|
|
OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
|
|
<< " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
|
|
return OS;
|
|
}
|
|
#endif
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// Describes a PHI node that may or may not exist. These track the PHIs
|
|
/// that must be created if we sunk a sequence of instructions. It provides
|
|
/// a hash function for efficient equality comparisons.
|
|
class ModelledPHI {
|
|
SmallVector<Value *, 4> Values;
|
|
SmallVector<BasicBlock *, 4> Blocks;
|
|
|
|
public:
|
|
ModelledPHI() = default;
|
|
|
|
ModelledPHI(const PHINode *PN) {
|
|
// BasicBlock comes first so we sort by basic block pointer order, then by value pointer order.
|
|
SmallVector<std::pair<BasicBlock *, Value *>, 4> Ops;
|
|
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
|
|
Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
|
|
llvm::sort(Ops);
|
|
for (auto &P : Ops) {
|
|
Blocks.push_back(P.first);
|
|
Values.push_back(P.second);
|
|
}
|
|
}
|
|
|
|
/// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
|
|
/// without the same ID.
|
|
/// \note This is specifically for DenseMapInfo - do not use this!
|
|
static ModelledPHI createDummy(size_t ID) {
|
|
ModelledPHI M;
|
|
M.Values.push_back(reinterpret_cast<Value*>(ID));
|
|
return M;
|
|
}
|
|
|
|
/// Create a PHI from an array of incoming values and incoming blocks.
|
|
template <typename VArray, typename BArray>
|
|
ModelledPHI(const VArray &V, const BArray &B) {
|
|
llvm::copy(V, std::back_inserter(Values));
|
|
llvm::copy(B, std::back_inserter(Blocks));
|
|
}
|
|
|
|
/// Create a PHI from [I[OpNum] for I in Insts].
|
|
template <typename BArray>
|
|
ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
|
|
llvm::copy(B, std::back_inserter(Blocks));
|
|
for (auto *I : Insts)
|
|
Values.push_back(I->getOperand(OpNum));
|
|
}
|
|
|
|
/// Restrict the PHI's contents down to only \c NewBlocks.
|
|
/// \c NewBlocks must be a subset of \c this->Blocks.
|
|
void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
|
|
auto BI = Blocks.begin();
|
|
auto VI = Values.begin();
|
|
while (BI != Blocks.end()) {
|
|
assert(VI != Values.end());
|
|
if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) ==
|
|
NewBlocks.end()) {
|
|
BI = Blocks.erase(BI);
|
|
VI = Values.erase(VI);
|
|
} else {
|
|
++BI;
|
|
++VI;
|
|
}
|
|
}
|
|
assert(Blocks.size() == NewBlocks.size());
|
|
}
|
|
|
|
ArrayRef<Value *> getValues() const { return Values; }
|
|
|
|
bool areAllIncomingValuesSame() const {
|
|
return llvm::all_of(Values, [&](Value *V) { return V == Values[0]; });
|
|
}
|
|
|
|
bool areAllIncomingValuesSameType() const {
|
|
return llvm::all_of(
|
|
Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
|
|
}
|
|
|
|
bool areAnyIncomingValuesConstant() const {
|
|
return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); });
|
|
}
|
|
|
|
// Hash functor
|
|
unsigned hash() const {
|
|
return (unsigned)hash_combine_range(Values.begin(), Values.end());
|
|
}
|
|
|
|
bool operator==(const ModelledPHI &Other) const {
|
|
return Values == Other.Values && Blocks == Other.Blocks;
|
|
}
|
|
};
|
|
|
|
template <typename ModelledPHI> struct DenseMapInfo {
|
|
static inline ModelledPHI &getEmptyKey() {
|
|
static ModelledPHI Dummy = ModelledPHI::createDummy(0);
|
|
return Dummy;
|
|
}
|
|
|
|
static inline ModelledPHI &getTombstoneKey() {
|
|
static ModelledPHI Dummy = ModelledPHI::createDummy(1);
|
|
return Dummy;
|
|
}
|
|
|
|
static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
|
|
|
|
static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
|
|
return LHS == RHS;
|
|
}
|
|
};
|
|
|
|
using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// ValueTable
|
|
//===----------------------------------------------------------------------===//
|
|
// This is a value number table where the value number is a function of the
|
|
// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
|
|
// that the program would be equivalent if we replaced A with PHI(A, B).
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// A GVN expression describing how an instruction is used. The operands
|
|
/// field of BasicExpression is used to store uses, not operands.
|
|
///
|
|
/// This class also contains fields for discriminators used when determining
|
|
/// equivalence of instructions with sideeffects.
|
|
class InstructionUseExpr : public GVNExpression::BasicExpression {
|
|
unsigned MemoryUseOrder = -1;
|
|
bool Volatile = false;
|
|
|
|
public:
|
|
InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
|
|
BumpPtrAllocator &A)
|
|
: GVNExpression::BasicExpression(I->getNumUses()) {
|
|
allocateOperands(R, A);
|
|
setOpcode(I->getOpcode());
|
|
setType(I->getType());
|
|
|
|
for (auto &U : I->uses())
|
|
op_push_back(U.getUser());
|
|
llvm::sort(op_begin(), op_end());
|
|
}
|
|
|
|
void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
|
|
void setVolatile(bool V) { Volatile = V; }
|
|
|
|
hash_code getHashValue() const override {
|
|
return hash_combine(GVNExpression::BasicExpression::getHashValue(),
|
|
MemoryUseOrder, Volatile);
|
|
}
|
|
|
|
template <typename Function> hash_code getHashValue(Function MapFn) {
|
|
hash_code H =
|
|
hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile);
|
|
for (auto *V : operands())
|
|
H = hash_combine(H, MapFn(V));
|
|
return H;
|
|
}
|
|
};
|
|
|
|
class ValueTable {
|
|
DenseMap<Value *, uint32_t> ValueNumbering;
|
|
DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
|
|
DenseMap<size_t, uint32_t> HashNumbering;
|
|
BumpPtrAllocator Allocator;
|
|
ArrayRecycler<Value *> Recycler;
|
|
uint32_t nextValueNumber = 1;
|
|
|
|
/// Create an expression for I based on its opcode and its uses. If I
|
|
/// touches or reads memory, the expression is also based upon its memory
|
|
/// order - see \c getMemoryUseOrder().
|
|
InstructionUseExpr *createExpr(Instruction *I) {
|
|
InstructionUseExpr *E =
|
|
new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
|
|
if (isMemoryInst(I))
|
|
E->setMemoryUseOrder(getMemoryUseOrder(I));
|
|
|
|
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
|
|
CmpInst::Predicate Predicate = C->getPredicate();
|
|
E->setOpcode((C->getOpcode() << 8) | Predicate);
|
|
}
|
|
return E;
|
|
}
|
|
|
|
/// Helper to compute the value number for a memory instruction
|
|
/// (LoadInst/StoreInst), including checking the memory ordering and
|
|
/// volatility.
|
|
template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
|
|
if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
|
|
return nullptr;
|
|
InstructionUseExpr *E = createExpr(I);
|
|
E->setVolatile(I->isVolatile());
|
|
return E;
|
|
}
|
|
|
|
public:
|
|
ValueTable() = default;
|
|
|
|
/// Returns the value number for the specified value, assigning
|
|
/// it a new number if it did not have one before.
|
|
uint32_t lookupOrAdd(Value *V) {
|
|
auto VI = ValueNumbering.find(V);
|
|
if (VI != ValueNumbering.end())
|
|
return VI->second;
|
|
|
|
if (!isa<Instruction>(V)) {
|
|
ValueNumbering[V] = nextValueNumber;
|
|
return nextValueNumber++;
|
|
}
|
|
|
|
Instruction *I = cast<Instruction>(V);
|
|
InstructionUseExpr *exp = nullptr;
|
|
switch (I->getOpcode()) {
|
|
case Instruction::Load:
|
|
exp = createMemoryExpr(cast<LoadInst>(I));
|
|
break;
|
|
case Instruction::Store:
|
|
exp = createMemoryExpr(cast<StoreInst>(I));
|
|
break;
|
|
case Instruction::Call:
|
|
case Instruction::Invoke:
|
|
case Instruction::FNeg:
|
|
case Instruction::Add:
|
|
case Instruction::FAdd:
|
|
case Instruction::Sub:
|
|
case Instruction::FSub:
|
|
case Instruction::Mul:
|
|
case Instruction::FMul:
|
|
case Instruction::UDiv:
|
|
case Instruction::SDiv:
|
|
case Instruction::FDiv:
|
|
case Instruction::URem:
|
|
case Instruction::SRem:
|
|
case Instruction::FRem:
|
|
case Instruction::Shl:
|
|
case Instruction::LShr:
|
|
case Instruction::AShr:
|
|
case Instruction::And:
|
|
case Instruction::Or:
|
|
case Instruction::Xor:
|
|
case Instruction::ICmp:
|
|
case Instruction::FCmp:
|
|
case Instruction::Trunc:
|
|
case Instruction::ZExt:
|
|
case Instruction::SExt:
|
|
case Instruction::FPToUI:
|
|
case Instruction::FPToSI:
|
|
case Instruction::UIToFP:
|
|
case Instruction::SIToFP:
|
|
case Instruction::FPTrunc:
|
|
case Instruction::FPExt:
|
|
case Instruction::PtrToInt:
|
|
case Instruction::IntToPtr:
|
|
case Instruction::BitCast:
|
|
case Instruction::Select:
|
|
case Instruction::ExtractElement:
|
|
case Instruction::InsertElement:
|
|
case Instruction::ShuffleVector:
|
|
case Instruction::InsertValue:
|
|
case Instruction::GetElementPtr:
|
|
exp = createExpr(I);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (!exp) {
|
|
ValueNumbering[V] = nextValueNumber;
|
|
return nextValueNumber++;
|
|
}
|
|
|
|
uint32_t e = ExpressionNumbering[exp];
|
|
if (!e) {
|
|
hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
|
|
auto I = HashNumbering.find(H);
|
|
if (I != HashNumbering.end()) {
|
|
e = I->second;
|
|
} else {
|
|
e = nextValueNumber++;
|
|
HashNumbering[H] = e;
|
|
ExpressionNumbering[exp] = e;
|
|
}
|
|
}
|
|
ValueNumbering[V] = e;
|
|
return e;
|
|
}
|
|
|
|
/// Returns the value number of the specified value. Fails if the value has
|
|
/// not yet been numbered.
|
|
uint32_t lookup(Value *V) const {
|
|
auto VI = ValueNumbering.find(V);
|
|
assert(VI != ValueNumbering.end() && "Value not numbered?");
|
|
return VI->second;
|
|
}
|
|
|
|
/// Removes all value numberings and resets the value table.
|
|
void clear() {
|
|
ValueNumbering.clear();
|
|
ExpressionNumbering.clear();
|
|
HashNumbering.clear();
|
|
Recycler.clear(Allocator);
|
|
nextValueNumber = 1;
|
|
}
|
|
|
|
/// \c Inst uses or touches memory. Return an ID describing the memory state
|
|
/// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
|
|
/// the exact same memory operations happen after I1 and I2.
|
|
///
|
|
/// This is a very hard problem in general, so we use domain-specific
|
|
/// knowledge that we only ever check for equivalence between blocks sharing a
|
|
/// single immediate successor that is common, and when determining if I1 ==
|
|
/// I2 we will have already determined that next(I1) == next(I2). This
|
|
/// inductive property allows us to simply return the value number of the next
|
|
/// instruction that defines memory.
|
|
uint32_t getMemoryUseOrder(Instruction *Inst) {
|
|
auto *BB = Inst->getParent();
|
|
for (auto I = std::next(Inst->getIterator()), E = BB->end();
|
|
I != E && !I->isTerminator(); ++I) {
|
|
if (!isMemoryInst(&*I))
|
|
continue;
|
|
if (isa<LoadInst>(&*I))
|
|
continue;
|
|
CallInst *CI = dyn_cast<CallInst>(&*I);
|
|
if (CI && CI->onlyReadsMemory())
|
|
continue;
|
|
InvokeInst *II = dyn_cast<InvokeInst>(&*I);
|
|
if (II && II->onlyReadsMemory())
|
|
continue;
|
|
return lookupOrAdd(&*I);
|
|
}
|
|
return 0;
|
|
}
|
|
};
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
class GVNSink {
|
|
public:
|
|
GVNSink() = default;
|
|
|
|
bool run(Function &F) {
|
|
LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
|
|
<< "\n");
|
|
|
|
unsigned NumSunk = 0;
|
|
ReversePostOrderTraversal<Function*> RPOT(&F);
|
|
for (auto *N : RPOT)
|
|
NumSunk += sinkBB(N);
|
|
|
|
return NumSunk > 0;
|
|
}
|
|
|
|
private:
|
|
ValueTable VN;
|
|
|
|
bool isInstructionBlacklisted(Instruction *I) {
|
|
// These instructions may change or break semantics if moved.
|
|
if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
|
|
I->getType()->isTokenTy())
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
/// The main heuristic function. Analyze the set of instructions pointed to by
|
|
/// LRI and return a candidate solution if these instructions can be sunk, or
|
|
/// None otherwise.
|
|
Optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
|
|
LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
|
|
ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
|
|
|
|
/// Create a ModelledPHI for each PHI in BB, adding to PHIs.
|
|
void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
|
|
SmallPtrSetImpl<Value *> &PHIContents) {
|
|
for (PHINode &PN : BB->phis()) {
|
|
auto MPHI = ModelledPHI(&PN);
|
|
PHIs.insert(MPHI);
|
|
for (auto *V : MPHI.getValues())
|
|
PHIContents.insert(V);
|
|
}
|
|
}
|
|
|
|
/// The main instruction sinking driver. Set up state and try and sink
|
|
/// instructions into BBEnd from its predecessors.
|
|
unsigned sinkBB(BasicBlock *BBEnd);
|
|
|
|
/// Perform the actual mechanics of sinking an instruction from Blocks into
|
|
/// BBEnd, which is their only successor.
|
|
void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
|
|
|
|
/// Remove PHIs that all have the same incoming value.
|
|
void foldPointlessPHINodes(BasicBlock *BB) {
|
|
auto I = BB->begin();
|
|
while (PHINode *PN = dyn_cast<PHINode>(I++)) {
|
|
if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) {
|
|
return V == PN->getIncomingValue(0);
|
|
}))
|
|
continue;
|
|
if (PN->getIncomingValue(0) != PN)
|
|
PN->replaceAllUsesWith(PN->getIncomingValue(0));
|
|
else
|
|
PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
|
|
PN->eraseFromParent();
|
|
}
|
|
}
|
|
};
|
|
|
|
Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
|
|
LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
|
|
ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) {
|
|
auto Insts = *LRI;
|
|
LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
|
|
: Insts) {
|
|
I->dump();
|
|
} dbgs() << " ]\n";);
|
|
|
|
DenseMap<uint32_t, unsigned> VNums;
|
|
for (auto *I : Insts) {
|
|
uint32_t N = VN.lookupOrAdd(I);
|
|
LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
|
|
if (N == ~0U)
|
|
return None;
|
|
VNums[N]++;
|
|
}
|
|
unsigned VNumToSink =
|
|
std::max_element(VNums.begin(), VNums.end(),
|
|
[](const std::pair<uint32_t, unsigned> &I,
|
|
const std::pair<uint32_t, unsigned> &J) {
|
|
return I.second < J.second;
|
|
})
|
|
->first;
|
|
|
|
if (VNums[VNumToSink] == 1)
|
|
// Can't sink anything!
|
|
return None;
|
|
|
|
// Now restrict the number of incoming blocks down to only those with
|
|
// VNumToSink.
|
|
auto &ActivePreds = LRI.getActiveBlocks();
|
|
unsigned InitialActivePredSize = ActivePreds.size();
|
|
SmallVector<Instruction *, 4> NewInsts;
|
|
for (auto *I : Insts) {
|
|
if (VN.lookup(I) != VNumToSink)
|
|
ActivePreds.remove(I->getParent());
|
|
else
|
|
NewInsts.push_back(I);
|
|
}
|
|
for (auto *I : NewInsts)
|
|
if (isInstructionBlacklisted(I))
|
|
return None;
|
|
|
|
// If we've restricted the incoming blocks, restrict all needed PHIs also
|
|
// to that set.
|
|
bool RecomputePHIContents = false;
|
|
if (ActivePreds.size() != InitialActivePredSize) {
|
|
ModelledPHISet NewNeededPHIs;
|
|
for (auto P : NeededPHIs) {
|
|
P.restrictToBlocks(ActivePreds);
|
|
NewNeededPHIs.insert(P);
|
|
}
|
|
NeededPHIs = NewNeededPHIs;
|
|
LRI.restrictToBlocks(ActivePreds);
|
|
RecomputePHIContents = true;
|
|
}
|
|
|
|
// The sunk instruction's results.
|
|
ModelledPHI NewPHI(NewInsts, ActivePreds);
|
|
|
|
// Does sinking this instruction render previous PHIs redundant?
|
|
if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) {
|
|
NeededPHIs.erase(NewPHI);
|
|
RecomputePHIContents = true;
|
|
}
|
|
|
|
if (RecomputePHIContents) {
|
|
// The needed PHIs have changed, so recompute the set of all needed
|
|
// values.
|
|
PHIContents.clear();
|
|
for (auto &PHI : NeededPHIs)
|
|
PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
|
|
}
|
|
|
|
// Is this instruction required by a later PHI that doesn't match this PHI?
|
|
// if so, we can't sink this instruction.
|
|
for (auto *V : NewPHI.getValues())
|
|
if (PHIContents.count(V))
|
|
// V exists in this PHI, but the whole PHI is different to NewPHI
|
|
// (else it would have been removed earlier). We cannot continue
|
|
// because this isn't representable.
|
|
return None;
|
|
|
|
// Which operands need PHIs?
|
|
// FIXME: If any of these fail, we should partition up the candidates to
|
|
// try and continue making progress.
|
|
Instruction *I0 = NewInsts[0];
|
|
|
|
// If all instructions that are going to participate don't have the same
|
|
// number of operands, we can't do any useful PHI analysis for all operands.
|
|
auto hasDifferentNumOperands = [&I0](Instruction *I) {
|
|
return I->getNumOperands() != I0->getNumOperands();
|
|
};
|
|
if (any_of(NewInsts, hasDifferentNumOperands))
|
|
return None;
|
|
|
|
for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
|
|
ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
|
|
if (PHI.areAllIncomingValuesSame())
|
|
continue;
|
|
if (!canReplaceOperandWithVariable(I0, OpNum))
|
|
// We can 't create a PHI from this instruction!
|
|
return None;
|
|
if (NeededPHIs.count(PHI))
|
|
continue;
|
|
if (!PHI.areAllIncomingValuesSameType())
|
|
return None;
|
|
// Don't create indirect calls! The called value is the final operand.
|
|
if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
|
|
PHI.areAnyIncomingValuesConstant())
|
|
return None;
|
|
|
|
NeededPHIs.reserve(NeededPHIs.size());
|
|
NeededPHIs.insert(PHI);
|
|
PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
|
|
}
|
|
|
|
if (isMemoryInst(NewInsts[0]))
|
|
++MemoryInstNum;
|
|
|
|
SinkingInstructionCandidate Cand;
|
|
Cand.NumInstructions = ++InstNum;
|
|
Cand.NumMemoryInsts = MemoryInstNum;
|
|
Cand.NumBlocks = ActivePreds.size();
|
|
Cand.NumPHIs = NeededPHIs.size();
|
|
for (auto *C : ActivePreds)
|
|
Cand.Blocks.push_back(C);
|
|
|
|
return Cand;
|
|
}
|
|
|
|
unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
|
|
LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
|
|
BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
|
|
SmallVector<BasicBlock *, 4> Preds;
|
|
for (auto *B : predecessors(BBEnd)) {
|
|
auto *T = B->getTerminator();
|
|
if (isa<BranchInst>(T) || isa<SwitchInst>(T))
|
|
Preds.push_back(B);
|
|
else
|
|
return 0;
|
|
}
|
|
if (Preds.size() < 2)
|
|
return 0;
|
|
llvm::sort(Preds);
|
|
|
|
unsigned NumOrigPreds = Preds.size();
|
|
// We can only sink instructions through unconditional branches.
|
|
for (auto I = Preds.begin(); I != Preds.end();) {
|
|
if ((*I)->getTerminator()->getNumSuccessors() != 1)
|
|
I = Preds.erase(I);
|
|
else
|
|
++I;
|
|
}
|
|
|
|
LockstepReverseIterator LRI(Preds);
|
|
SmallVector<SinkingInstructionCandidate, 4> Candidates;
|
|
unsigned InstNum = 0, MemoryInstNum = 0;
|
|
ModelledPHISet NeededPHIs;
|
|
SmallPtrSet<Value *, 4> PHIContents;
|
|
analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
|
|
unsigned NumOrigPHIs = NeededPHIs.size();
|
|
|
|
while (LRI.isValid()) {
|
|
auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
|
|
NeededPHIs, PHIContents);
|
|
if (!Cand)
|
|
break;
|
|
Cand->calculateCost(NumOrigPHIs, Preds.size());
|
|
Candidates.emplace_back(*Cand);
|
|
--LRI;
|
|
}
|
|
|
|
llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());
|
|
LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
|
|
: Candidates) dbgs()
|
|
<< " " << C << "\n";);
|
|
|
|
// Pick the top candidate, as long it is positive!
|
|
if (Candidates.empty() || Candidates.front().Cost <= 0)
|
|
return 0;
|
|
auto C = Candidates.front();
|
|
|
|
LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
|
|
BasicBlock *InsertBB = BBEnd;
|
|
if (C.Blocks.size() < NumOrigPreds) {
|
|
LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
|
|
BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
|
|
InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
|
|
if (!InsertBB) {
|
|
LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
|
|
// Edge couldn't be split.
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
for (unsigned I = 0; I < C.NumInstructions; ++I)
|
|
sinkLastInstruction(C.Blocks, InsertBB);
|
|
|
|
return C.NumInstructions;
|
|
}
|
|
|
|
void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
|
|
BasicBlock *BBEnd) {
|
|
SmallVector<Instruction *, 4> Insts;
|
|
for (BasicBlock *BB : Blocks)
|
|
Insts.push_back(BB->getTerminator()->getPrevNode());
|
|
Instruction *I0 = Insts.front();
|
|
|
|
SmallVector<Value *, 4> NewOperands;
|
|
for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
|
|
bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) {
|
|
return I->getOperand(O) != I0->getOperand(O);
|
|
});
|
|
if (!NeedPHI) {
|
|
NewOperands.push_back(I0->getOperand(O));
|
|
continue;
|
|
}
|
|
|
|
// Create a new PHI in the successor block and populate it.
|
|
auto *Op = I0->getOperand(O);
|
|
assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
|
|
auto *PN = PHINode::Create(Op->getType(), Insts.size(),
|
|
Op->getName() + ".sink", &BBEnd->front());
|
|
for (auto *I : Insts)
|
|
PN->addIncoming(I->getOperand(O), I->getParent());
|
|
NewOperands.push_back(PN);
|
|
}
|
|
|
|
// Arbitrarily use I0 as the new "common" instruction; remap its operands
|
|
// and move it to the start of the successor block.
|
|
for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
|
|
I0->getOperandUse(O).set(NewOperands[O]);
|
|
I0->moveBefore(&*BBEnd->getFirstInsertionPt());
|
|
|
|
// Update metadata and IR flags.
|
|
for (auto *I : Insts)
|
|
if (I != I0) {
|
|
combineMetadataForCSE(I0, I, true);
|
|
I0->andIRFlags(I);
|
|
}
|
|
|
|
for (auto *I : Insts)
|
|
if (I != I0)
|
|
I->replaceAllUsesWith(I0);
|
|
foldPointlessPHINodes(BBEnd);
|
|
|
|
// Finally nuke all instructions apart from the common instruction.
|
|
for (auto *I : Insts)
|
|
if (I != I0)
|
|
I->eraseFromParent();
|
|
|
|
NumRemoved += Insts.size() - 1;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Pass machinery / boilerplate
|
|
|
|
class GVNSinkLegacyPass : public FunctionPass {
|
|
public:
|
|
static char ID;
|
|
|
|
GVNSinkLegacyPass() : FunctionPass(ID) {
|
|
initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
bool runOnFunction(Function &F) override {
|
|
if (skipFunction(F))
|
|
return false;
|
|
GVNSink G;
|
|
return G.run(F);
|
|
}
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
AU.addPreserved<GlobalsAAWrapperPass>();
|
|
}
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
|
|
GVNSink G;
|
|
if (!G.run(F))
|
|
return PreservedAnalyses::all();
|
|
|
|
PreservedAnalyses PA;
|
|
PA.preserve<GlobalsAA>();
|
|
return PA;
|
|
}
|
|
|
|
char GVNSinkLegacyPass::ID = 0;
|
|
|
|
INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
|
|
"Early GVN sinking of Expressions", false, false)
|
|
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
|
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
|
|
INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
|
|
"Early GVN sinking of Expressions", false, false)
|
|
|
|
FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }
|