[MergeICmps] Make base ordering really deterministic.

Summary:
The idea is that we now manipulate bases through a `unsigned BaseID` based on
order of appearance in the comparison chain rather than through the `Value*`.

Fixes 40714.

Reviewers: gchatelet

Subscribers: mgrang, jfb, jdoerfert, llvm-commits, hans

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D58274

llvm-svn: 354131
This commit is contained in:
Clement Courbet 2019-02-15 14:17:17 +00:00
parent cc004df7eb
commit f7e84a2ccc
1 changed files with 106 additions and 86 deletions

View File

@ -41,10 +41,6 @@
//
//===----------------------------------------------------------------------===//
#include <algorithm>
#include <numeric>
#include <utility>
#include <vector>
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@ -53,6 +49,10 @@
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include <algorithm>
#include <numeric>
#include <utility>
#include <vector>
using namespace llvm;
@ -73,71 +73,87 @@ static bool isSimpleLoadOrStore(const Instruction *I) {
// that is a constant offset from a base value, e.g. `a` or `o.c` in the example
// at the top.
struct BCEAtom {
BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {}
const Value *Base() const { return GEP ? GEP->getPointerOperand() : nullptr; }
BCEAtom() = default;
BCEAtom(GetElementPtrInst *GEP, LoadInst *LoadI, int BaseId, APInt Offset)
: GEP(GEP), LoadI(LoadI), BaseId(BaseId), Offset(Offset) {}
// We want to order BCEAtoms by (Base, Offset). However we cannot use
// the pointer values for Base because these are non-deterministic.
// To make sure that the sort order is stable, we first assign to each atom
// base value an index based on its order of appearance in the chain of
// comparisons. We call this index `BaseOrdering`. For example, for:
// b[3] == c[2] && a[1] == d[1] && b[4] == c[3]
// | block 1 | | block 2 | | block 3 |
// b gets assigned index 0 and a index 1, because b appears as LHS in block 1,
// which is before block 2.
// We then sort by (BaseOrdering[LHS.Base()], LHS.Offset), which is stable.
bool operator<(const BCEAtom &O) const {
assert(Base() && "invalid atom");
assert(O.Base() && "invalid atom");
// Just ordering by (Base(), Offset) is sufficient. However because this
// means that the ordering will depend on the addresses of the base
// values, which are not reproducible from run to run. To guarantee
// stability, we use the names of the values if they exist; we sort by:
// (Base.getName(), Base(), Offset).
const int NameCmp = Base()->getName().compare(O.Base()->getName());
if (NameCmp == 0) {
if (Base() == O.Base()) {
return Offset.slt(O.Offset);
}
return Base() < O.Base();
}
return NameCmp < 0;
return BaseId != O.BaseId ? BaseId < O.BaseId : Offset.slt(O.Offset);
}
GetElementPtrInst *GEP;
LoadInst *LoadI;
GetElementPtrInst *GEP = nullptr;
LoadInst *LoadI = nullptr;
unsigned BaseId = 0;
APInt Offset;
};
// A class that assigns increasing ids to values in the order in which they are
// seen. See comment in `BCEAtom::operator<()``.
class BaseIdentifier {
public:
// Returns the id for value `Base`, after assigning one if `Base` has not been
// seen before.
int getBaseId(const Value *Base) {
assert(Base && "invalid base");
const auto Insertion = BaseToIndex.try_emplace(Base, Order);
if (Insertion.second)
++Order;
return Insertion.first->second;
}
private:
unsigned Order = 1;
DenseMap<const Value*, int> BaseToIndex;
};
// If this value is a load from a constant offset w.r.t. a base address, and
// there are no other users of the load or address, returns the base address and
// the offset.
BCEAtom visitICmpLoadOperand(Value *const Val) {
BCEAtom Result;
if (auto *const LoadI = dyn_cast<LoadInst>(Val)) {
LLVM_DEBUG(dbgs() << "load\n");
if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
LLVM_DEBUG(dbgs() << "used outside of block\n");
return {};
}
// Do not optimize atomic loads to non-atomic memcmp
if (!LoadI->isSimple()) {
LLVM_DEBUG(dbgs() << "volatile or atomic\n");
return {};
}
Value *const Addr = LoadI->getOperand(0);
if (auto *const GEP = dyn_cast<GetElementPtrInst>(Addr)) {
LLVM_DEBUG(dbgs() << "GEP\n");
if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
LLVM_DEBUG(dbgs() << "used outside of block\n");
return {};
}
const auto &DL = GEP->getModule()->getDataLayout();
if (!isDereferenceablePointer(GEP, DL)) {
LLVM_DEBUG(dbgs() << "not dereferenceable\n");
// We need to make sure that we can do comparison in any order, so we
// require memory to be unconditionnally dereferencable.
return {};
}
Result.Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
if (GEP->accumulateConstantOffset(DL, Result.Offset)) {
Result.GEP = GEP;
Result.LoadI = LoadI;
}
}
BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) {
auto *const LoadI = dyn_cast<LoadInst>(Val);
if (!LoadI)
return {};
LLVM_DEBUG(dbgs() << "load\n");
if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
LLVM_DEBUG(dbgs() << "used outside of block\n");
return {};
}
return Result;
// Do not optimize atomic loads to non-atomic memcmp
if (!LoadI->isSimple()) {
LLVM_DEBUG(dbgs() << "volatile or atomic\n");
return {};
}
Value *const Addr = LoadI->getOperand(0);
auto *const GEP = dyn_cast<GetElementPtrInst>(Addr);
if (!GEP)
return {};
LLVM_DEBUG(dbgs() << "GEP\n");
if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
LLVM_DEBUG(dbgs() << "used outside of block\n");
return {};
}
const auto &DL = GEP->getModule()->getDataLayout();
if (!isDereferenceablePointer(GEP, DL)) {
LLVM_DEBUG(dbgs() << "not dereferenceable\n");
// We need to make sure that we can do comparison in any order, so we
// require memory to be unconditionnally dereferencable.
return {};
}
APInt Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
if (!GEP->accumulateConstantOffset(DL, Offset))
return {};
return BCEAtom(GEP, LoadI, BaseId.getBaseId(GEP->getPointerOperand()),
Offset);
}
// A basic block with a comparison between two BCE atoms, e.g. `a == o.a` in the
@ -159,9 +175,7 @@ class BCECmpBlock {
if (Rhs_ < Lhs_) std::swap(Rhs_, Lhs_);
}
bool IsValid() const {
return Lhs_.Base() != nullptr && Rhs_.Base() != nullptr;
}
bool IsValid() const { return Lhs_.BaseId != 0 && Rhs_.BaseId != 0; }
// Assert the block is consistent: If valid, it should also have
// non-null members besides Lhs_ and Rhs_.
@ -287,7 +301,8 @@ bool BCECmpBlock::doesOtherWork() const {
// Visit the given comparison. If this is a comparison between two valid
// BCE atoms, returns the comparison.
BCECmpBlock visitICmp(const ICmpInst *const CmpI,
const ICmpInst::Predicate ExpectedPredicate) {
const ICmpInst::Predicate ExpectedPredicate,
BaseIdentifier &BaseId) {
// The comparison can only be used once:
// - For intermediate blocks, as a branch condition.
// - For the final block, as an incoming value for the Phi.
@ -297,25 +312,27 @@ BCECmpBlock visitICmp(const ICmpInst *const CmpI,
LLVM_DEBUG(dbgs() << "cmp has several uses\n");
return {};
}
if (CmpI->getPredicate() == ExpectedPredicate) {
LLVM_DEBUG(dbgs() << "cmp "
<< (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne")
<< "\n");
auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0));
if (!Lhs.Base()) return {};
auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1));
if (!Rhs.Base()) return {};
const auto &DL = CmpI->getModule()->getDataLayout();
return BCECmpBlock(std::move(Lhs), std::move(Rhs),
DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()));
}
return {};
if (CmpI->getPredicate() != ExpectedPredicate)
return {};
LLVM_DEBUG(dbgs() << "cmp "
<< (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne")
<< "\n");
auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0), BaseId);
if (!Lhs.BaseId)
return {};
auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1), BaseId);
if (!Rhs.BaseId)
return {};
const auto &DL = CmpI->getModule()->getDataLayout();
return BCECmpBlock(std::move(Lhs), std::move(Rhs),
DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()));
}
// Visit the given comparison block. If this is a comparison between two valid
// BCE atoms, returns the comparison.
BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
const BasicBlock *const PhiBlock) {
const BasicBlock *const PhiBlock,
BaseIdentifier &BaseId) {
if (Block->empty()) return {};
auto *const BranchI = dyn_cast<BranchInst>(Block->getTerminator());
if (!BranchI) return {};
@ -328,7 +345,7 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
auto *const CmpI = dyn_cast<ICmpInst>(Val);
if (!CmpI) return {};
LLVM_DEBUG(dbgs() << "icmp\n");
auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ);
auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ, BaseId);
Result.CmpI = CmpI;
Result.BranchI = BranchI;
return Result;
@ -345,7 +362,8 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
assert(BranchI->getNumSuccessors() == 2 && "expecting a cond branch");
BasicBlock *const FalseBlock = BranchI->getSuccessor(1);
auto Result = visitICmp(
CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE);
CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
BaseId);
Result.CmpI = CmpI;
Result.BranchI = BranchI;
return Result;
@ -357,9 +375,9 @@ static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
BCECmpBlock &Comparison) {
LLVM_DEBUG(dbgs() << "Block '" << Comparison.BB->getName()
<< "': Found cmp of " << Comparison.SizeBits()
<< " bits between " << Comparison.Lhs().Base() << " + "
<< " bits between " << Comparison.Lhs().BaseId << " + "
<< Comparison.Lhs().Offset << " and "
<< Comparison.Rhs().Base() << " + "
<< Comparison.Rhs().BaseId << " + "
<< Comparison.Rhs().Offset << "\n");
LLVM_DEBUG(dbgs() << "\n");
Comparisons.push_back(Comparison);
@ -382,8 +400,8 @@ class BCECmpChain {
private:
static bool IsContiguous(const BCECmpBlock &First,
const BCECmpBlock &Second) {
return First.Lhs().Base() == Second.Lhs().Base() &&
First.Rhs().Base() == Second.Rhs().Base() &&
return First.Lhs().BaseId == Second.Lhs().BaseId &&
First.Rhs().BaseId == Second.Rhs().BaseId &&
First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
}
@ -407,11 +425,12 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
assert(!Blocks.empty() && "a chain should have at least one block");
// Now look inside blocks to check for BCE comparisons.
std::vector<BCECmpBlock> Comparisons;
BaseIdentifier BaseId;
for (size_t BlockIdx = 0; BlockIdx < Blocks.size(); ++BlockIdx) {
BasicBlock *const Block = Blocks[BlockIdx];
assert(Block && "invalid block");
BCECmpBlock Comparison = visitCmpBlock(Phi.getIncomingValueForBlock(Block),
Block, Phi.getParent());
Block, Phi.getParent(), BaseId);
Comparison.BB = Block;
if (!Comparison.IsValid()) {
LLVM_DEBUG(dbgs() << "chain with invalid BCECmpBlock, no merge.\n");
@ -488,9 +507,10 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
#endif // MERGEICMPS_DOT_ON
// Reorder blocks by LHS. We can do that without changing the
// semantics because we are only accessing dereferencable memory.
llvm::sort(Comparisons_, [](const BCECmpBlock &a, const BCECmpBlock &b) {
return a.Lhs() < b.Lhs();
});
llvm::sort(Comparisons_,
[](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) {
return LhsBlock.Lhs() < RhsBlock.Lhs();
});
#ifdef MERGEICMPS_DOT_ON
errs() << "AFTER REORDERING:\n\n";
dump();