forked from OSchip/llvm-project
[MergeICmps] Make base ordering really deterministic.
Summary: The idea is that we now manipulate bases through a `unsigned BaseID` based on order of appearance in the comparison chain rather than through the `Value*`. Fixes 40714. Reviewers: gchatelet Subscribers: mgrang, jfb, jdoerfert, llvm-commits, hans Tags: #llvm Differential Revision: https://reviews.llvm.org/D58274 llvm-svn: 354131
This commit is contained in:
parent
cc004df7eb
commit
f7e84a2ccc
|
@ -41,10 +41,6 @@
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <numeric>
|
|
||||||
#include <utility>
|
|
||||||
#include <vector>
|
|
||||||
#include "llvm/Analysis/Loads.h"
|
#include "llvm/Analysis/Loads.h"
|
||||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||||
|
@ -53,6 +49,10 @@
|
||||||
#include "llvm/Pass.h"
|
#include "llvm/Pass.h"
|
||||||
#include "llvm/Transforms/Scalar.h"
|
#include "llvm/Transforms/Scalar.h"
|
||||||
#include "llvm/Transforms/Utils/BuildLibCalls.h"
|
#include "llvm/Transforms/Utils/BuildLibCalls.h"
|
||||||
|
#include <algorithm>
|
||||||
|
#include <numeric>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
|
@ -73,71 +73,87 @@ static bool isSimpleLoadOrStore(const Instruction *I) {
|
||||||
// that is a constant offset from a base value, e.g. `a` or `o.c` in the example
|
// that is a constant offset from a base value, e.g. `a` or `o.c` in the example
|
||||||
// at the top.
|
// at the top.
|
||||||
struct BCEAtom {
|
struct BCEAtom {
|
||||||
BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {}
|
BCEAtom() = default;
|
||||||
|
BCEAtom(GetElementPtrInst *GEP, LoadInst *LoadI, int BaseId, APInt Offset)
|
||||||
const Value *Base() const { return GEP ? GEP->getPointerOperand() : nullptr; }
|
: GEP(GEP), LoadI(LoadI), BaseId(BaseId), Offset(Offset) {}
|
||||||
|
|
||||||
|
// We want to order BCEAtoms by (Base, Offset). However we cannot use
|
||||||
|
// the pointer values for Base because these are non-deterministic.
|
||||||
|
// To make sure that the sort order is stable, we first assign to each atom
|
||||||
|
// base value an index based on its order of appearance in the chain of
|
||||||
|
// comparisons. We call this index `BaseOrdering`. For example, for:
|
||||||
|
// b[3] == c[2] && a[1] == d[1] && b[4] == c[3]
|
||||||
|
// | block 1 | | block 2 | | block 3 |
|
||||||
|
// b gets assigned index 0 and a index 1, because b appears as LHS in block 1,
|
||||||
|
// which is before block 2.
|
||||||
|
// We then sort by (BaseOrdering[LHS.Base()], LHS.Offset), which is stable.
|
||||||
bool operator<(const BCEAtom &O) const {
|
bool operator<(const BCEAtom &O) const {
|
||||||
assert(Base() && "invalid atom");
|
return BaseId != O.BaseId ? BaseId < O.BaseId : Offset.slt(O.Offset);
|
||||||
assert(O.Base() && "invalid atom");
|
|
||||||
// Just ordering by (Base(), Offset) is sufficient. However because this
|
|
||||||
// means that the ordering will depend on the addresses of the base
|
|
||||||
// values, which are not reproducible from run to run. To guarantee
|
|
||||||
// stability, we use the names of the values if they exist; we sort by:
|
|
||||||
// (Base.getName(), Base(), Offset).
|
|
||||||
const int NameCmp = Base()->getName().compare(O.Base()->getName());
|
|
||||||
if (NameCmp == 0) {
|
|
||||||
if (Base() == O.Base()) {
|
|
||||||
return Offset.slt(O.Offset);
|
|
||||||
}
|
|
||||||
return Base() < O.Base();
|
|
||||||
}
|
|
||||||
return NameCmp < 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
GetElementPtrInst *GEP;
|
GetElementPtrInst *GEP = nullptr;
|
||||||
LoadInst *LoadI;
|
LoadInst *LoadI = nullptr;
|
||||||
|
unsigned BaseId = 0;
|
||||||
APInt Offset;
|
APInt Offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// A class that assigns increasing ids to values in the order in which they are
|
||||||
|
// seen. See comment in `BCEAtom::operator<()``.
|
||||||
|
class BaseIdentifier {
|
||||||
|
public:
|
||||||
|
// Returns the id for value `Base`, after assigning one if `Base` has not been
|
||||||
|
// seen before.
|
||||||
|
int getBaseId(const Value *Base) {
|
||||||
|
assert(Base && "invalid base");
|
||||||
|
const auto Insertion = BaseToIndex.try_emplace(Base, Order);
|
||||||
|
if (Insertion.second)
|
||||||
|
++Order;
|
||||||
|
return Insertion.first->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
unsigned Order = 1;
|
||||||
|
DenseMap<const Value*, int> BaseToIndex;
|
||||||
|
};
|
||||||
|
|
||||||
// If this value is a load from a constant offset w.r.t. a base address, and
|
// If this value is a load from a constant offset w.r.t. a base address, and
|
||||||
// there are no other users of the load or address, returns the base address and
|
// there are no other users of the load or address, returns the base address and
|
||||||
// the offset.
|
// the offset.
|
||||||
BCEAtom visitICmpLoadOperand(Value *const Val) {
|
BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) {
|
||||||
BCEAtom Result;
|
auto *const LoadI = dyn_cast<LoadInst>(Val);
|
||||||
if (auto *const LoadI = dyn_cast<LoadInst>(Val)) {
|
if (!LoadI)
|
||||||
LLVM_DEBUG(dbgs() << "load\n");
|
return {};
|
||||||
if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
|
LLVM_DEBUG(dbgs() << "load\n");
|
||||||
LLVM_DEBUG(dbgs() << "used outside of block\n");
|
if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
|
||||||
return {};
|
LLVM_DEBUG(dbgs() << "used outside of block\n");
|
||||||
}
|
return {};
|
||||||
// Do not optimize atomic loads to non-atomic memcmp
|
|
||||||
if (!LoadI->isSimple()) {
|
|
||||||
LLVM_DEBUG(dbgs() << "volatile or atomic\n");
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
Value *const Addr = LoadI->getOperand(0);
|
|
||||||
if (auto *const GEP = dyn_cast<GetElementPtrInst>(Addr)) {
|
|
||||||
LLVM_DEBUG(dbgs() << "GEP\n");
|
|
||||||
if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
|
|
||||||
LLVM_DEBUG(dbgs() << "used outside of block\n");
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
const auto &DL = GEP->getModule()->getDataLayout();
|
|
||||||
if (!isDereferenceablePointer(GEP, DL)) {
|
|
||||||
LLVM_DEBUG(dbgs() << "not dereferenceable\n");
|
|
||||||
// We need to make sure that we can do comparison in any order, so we
|
|
||||||
// require memory to be unconditionnally dereferencable.
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
Result.Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
|
|
||||||
if (GEP->accumulateConstantOffset(DL, Result.Offset)) {
|
|
||||||
Result.GEP = GEP;
|
|
||||||
Result.LoadI = LoadI;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return Result;
|
// Do not optimize atomic loads to non-atomic memcmp
|
||||||
|
if (!LoadI->isSimple()) {
|
||||||
|
LLVM_DEBUG(dbgs() << "volatile or atomic\n");
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
Value *const Addr = LoadI->getOperand(0);
|
||||||
|
auto *const GEP = dyn_cast<GetElementPtrInst>(Addr);
|
||||||
|
if (!GEP)
|
||||||
|
return {};
|
||||||
|
LLVM_DEBUG(dbgs() << "GEP\n");
|
||||||
|
if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
|
||||||
|
LLVM_DEBUG(dbgs() << "used outside of block\n");
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
const auto &DL = GEP->getModule()->getDataLayout();
|
||||||
|
if (!isDereferenceablePointer(GEP, DL)) {
|
||||||
|
LLVM_DEBUG(dbgs() << "not dereferenceable\n");
|
||||||
|
// We need to make sure that we can do comparison in any order, so we
|
||||||
|
// require memory to be unconditionnally dereferencable.
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
APInt Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
|
||||||
|
if (!GEP->accumulateConstantOffset(DL, Offset))
|
||||||
|
return {};
|
||||||
|
return BCEAtom(GEP, LoadI, BaseId.getBaseId(GEP->getPointerOperand()),
|
||||||
|
Offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
// A basic block with a comparison between two BCE atoms, e.g. `a == o.a` in the
|
// A basic block with a comparison between two BCE atoms, e.g. `a == o.a` in the
|
||||||
|
@ -159,9 +175,7 @@ class BCECmpBlock {
|
||||||
if (Rhs_ < Lhs_) std::swap(Rhs_, Lhs_);
|
if (Rhs_ < Lhs_) std::swap(Rhs_, Lhs_);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsValid() const {
|
bool IsValid() const { return Lhs_.BaseId != 0 && Rhs_.BaseId != 0; }
|
||||||
return Lhs_.Base() != nullptr && Rhs_.Base() != nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Assert the block is consistent: If valid, it should also have
|
// Assert the block is consistent: If valid, it should also have
|
||||||
// non-null members besides Lhs_ and Rhs_.
|
// non-null members besides Lhs_ and Rhs_.
|
||||||
|
@ -287,7 +301,8 @@ bool BCECmpBlock::doesOtherWork() const {
|
||||||
// Visit the given comparison. If this is a comparison between two valid
|
// Visit the given comparison. If this is a comparison between two valid
|
||||||
// BCE atoms, returns the comparison.
|
// BCE atoms, returns the comparison.
|
||||||
BCECmpBlock visitICmp(const ICmpInst *const CmpI,
|
BCECmpBlock visitICmp(const ICmpInst *const CmpI,
|
||||||
const ICmpInst::Predicate ExpectedPredicate) {
|
const ICmpInst::Predicate ExpectedPredicate,
|
||||||
|
BaseIdentifier &BaseId) {
|
||||||
// The comparison can only be used once:
|
// The comparison can only be used once:
|
||||||
// - For intermediate blocks, as a branch condition.
|
// - For intermediate blocks, as a branch condition.
|
||||||
// - For the final block, as an incoming value for the Phi.
|
// - For the final block, as an incoming value for the Phi.
|
||||||
|
@ -297,25 +312,27 @@ BCECmpBlock visitICmp(const ICmpInst *const CmpI,
|
||||||
LLVM_DEBUG(dbgs() << "cmp has several uses\n");
|
LLVM_DEBUG(dbgs() << "cmp has several uses\n");
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
if (CmpI->getPredicate() == ExpectedPredicate) {
|
if (CmpI->getPredicate() != ExpectedPredicate)
|
||||||
LLVM_DEBUG(dbgs() << "cmp "
|
return {};
|
||||||
<< (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne")
|
LLVM_DEBUG(dbgs() << "cmp "
|
||||||
<< "\n");
|
<< (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne")
|
||||||
auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0));
|
<< "\n");
|
||||||
if (!Lhs.Base()) return {};
|
auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0), BaseId);
|
||||||
auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1));
|
if (!Lhs.BaseId)
|
||||||
if (!Rhs.Base()) return {};
|
return {};
|
||||||
const auto &DL = CmpI->getModule()->getDataLayout();
|
auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1), BaseId);
|
||||||
return BCECmpBlock(std::move(Lhs), std::move(Rhs),
|
if (!Rhs.BaseId)
|
||||||
DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()));
|
return {};
|
||||||
}
|
const auto &DL = CmpI->getModule()->getDataLayout();
|
||||||
return {};
|
return BCECmpBlock(std::move(Lhs), std::move(Rhs),
|
||||||
|
DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Visit the given comparison block. If this is a comparison between two valid
|
// Visit the given comparison block. If this is a comparison between two valid
|
||||||
// BCE atoms, returns the comparison.
|
// BCE atoms, returns the comparison.
|
||||||
BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
|
BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
|
||||||
const BasicBlock *const PhiBlock) {
|
const BasicBlock *const PhiBlock,
|
||||||
|
BaseIdentifier &BaseId) {
|
||||||
if (Block->empty()) return {};
|
if (Block->empty()) return {};
|
||||||
auto *const BranchI = dyn_cast<BranchInst>(Block->getTerminator());
|
auto *const BranchI = dyn_cast<BranchInst>(Block->getTerminator());
|
||||||
if (!BranchI) return {};
|
if (!BranchI) return {};
|
||||||
|
@ -328,7 +345,7 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
|
||||||
auto *const CmpI = dyn_cast<ICmpInst>(Val);
|
auto *const CmpI = dyn_cast<ICmpInst>(Val);
|
||||||
if (!CmpI) return {};
|
if (!CmpI) return {};
|
||||||
LLVM_DEBUG(dbgs() << "icmp\n");
|
LLVM_DEBUG(dbgs() << "icmp\n");
|
||||||
auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ);
|
auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ, BaseId);
|
||||||
Result.CmpI = CmpI;
|
Result.CmpI = CmpI;
|
||||||
Result.BranchI = BranchI;
|
Result.BranchI = BranchI;
|
||||||
return Result;
|
return Result;
|
||||||
|
@ -345,7 +362,8 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
|
||||||
assert(BranchI->getNumSuccessors() == 2 && "expecting a cond branch");
|
assert(BranchI->getNumSuccessors() == 2 && "expecting a cond branch");
|
||||||
BasicBlock *const FalseBlock = BranchI->getSuccessor(1);
|
BasicBlock *const FalseBlock = BranchI->getSuccessor(1);
|
||||||
auto Result = visitICmp(
|
auto Result = visitICmp(
|
||||||
CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE);
|
CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
|
||||||
|
BaseId);
|
||||||
Result.CmpI = CmpI;
|
Result.CmpI = CmpI;
|
||||||
Result.BranchI = BranchI;
|
Result.BranchI = BranchI;
|
||||||
return Result;
|
return Result;
|
||||||
|
@ -357,9 +375,9 @@ static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
|
||||||
BCECmpBlock &Comparison) {
|
BCECmpBlock &Comparison) {
|
||||||
LLVM_DEBUG(dbgs() << "Block '" << Comparison.BB->getName()
|
LLVM_DEBUG(dbgs() << "Block '" << Comparison.BB->getName()
|
||||||
<< "': Found cmp of " << Comparison.SizeBits()
|
<< "': Found cmp of " << Comparison.SizeBits()
|
||||||
<< " bits between " << Comparison.Lhs().Base() << " + "
|
<< " bits between " << Comparison.Lhs().BaseId << " + "
|
||||||
<< Comparison.Lhs().Offset << " and "
|
<< Comparison.Lhs().Offset << " and "
|
||||||
<< Comparison.Rhs().Base() << " + "
|
<< Comparison.Rhs().BaseId << " + "
|
||||||
<< Comparison.Rhs().Offset << "\n");
|
<< Comparison.Rhs().Offset << "\n");
|
||||||
LLVM_DEBUG(dbgs() << "\n");
|
LLVM_DEBUG(dbgs() << "\n");
|
||||||
Comparisons.push_back(Comparison);
|
Comparisons.push_back(Comparison);
|
||||||
|
@ -382,8 +400,8 @@ class BCECmpChain {
|
||||||
private:
|
private:
|
||||||
static bool IsContiguous(const BCECmpBlock &First,
|
static bool IsContiguous(const BCECmpBlock &First,
|
||||||
const BCECmpBlock &Second) {
|
const BCECmpBlock &Second) {
|
||||||
return First.Lhs().Base() == Second.Lhs().Base() &&
|
return First.Lhs().BaseId == Second.Lhs().BaseId &&
|
||||||
First.Rhs().Base() == Second.Rhs().Base() &&
|
First.Rhs().BaseId == Second.Rhs().BaseId &&
|
||||||
First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
|
First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
|
||||||
First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
|
First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
|
||||||
}
|
}
|
||||||
|
@ -407,11 +425,12 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
|
||||||
assert(!Blocks.empty() && "a chain should have at least one block");
|
assert(!Blocks.empty() && "a chain should have at least one block");
|
||||||
// Now look inside blocks to check for BCE comparisons.
|
// Now look inside blocks to check for BCE comparisons.
|
||||||
std::vector<BCECmpBlock> Comparisons;
|
std::vector<BCECmpBlock> Comparisons;
|
||||||
|
BaseIdentifier BaseId;
|
||||||
for (size_t BlockIdx = 0; BlockIdx < Blocks.size(); ++BlockIdx) {
|
for (size_t BlockIdx = 0; BlockIdx < Blocks.size(); ++BlockIdx) {
|
||||||
BasicBlock *const Block = Blocks[BlockIdx];
|
BasicBlock *const Block = Blocks[BlockIdx];
|
||||||
assert(Block && "invalid block");
|
assert(Block && "invalid block");
|
||||||
BCECmpBlock Comparison = visitCmpBlock(Phi.getIncomingValueForBlock(Block),
|
BCECmpBlock Comparison = visitCmpBlock(Phi.getIncomingValueForBlock(Block),
|
||||||
Block, Phi.getParent());
|
Block, Phi.getParent(), BaseId);
|
||||||
Comparison.BB = Block;
|
Comparison.BB = Block;
|
||||||
if (!Comparison.IsValid()) {
|
if (!Comparison.IsValid()) {
|
||||||
LLVM_DEBUG(dbgs() << "chain with invalid BCECmpBlock, no merge.\n");
|
LLVM_DEBUG(dbgs() << "chain with invalid BCECmpBlock, no merge.\n");
|
||||||
|
@ -488,9 +507,10 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
|
||||||
#endif // MERGEICMPS_DOT_ON
|
#endif // MERGEICMPS_DOT_ON
|
||||||
// Reorder blocks by LHS. We can do that without changing the
|
// Reorder blocks by LHS. We can do that without changing the
|
||||||
// semantics because we are only accessing dereferencable memory.
|
// semantics because we are only accessing dereferencable memory.
|
||||||
llvm::sort(Comparisons_, [](const BCECmpBlock &a, const BCECmpBlock &b) {
|
llvm::sort(Comparisons_,
|
||||||
return a.Lhs() < b.Lhs();
|
[](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) {
|
||||||
});
|
return LhsBlock.Lhs() < RhsBlock.Lhs();
|
||||||
|
});
|
||||||
#ifdef MERGEICMPS_DOT_ON
|
#ifdef MERGEICMPS_DOT_ON
|
||||||
errs() << "AFTER REORDERING:\n\n";
|
errs() << "AFTER REORDERING:\n\n";
|
||||||
dump();
|
dump();
|
||||||
|
|
Loading…
Reference in New Issue