forked from OSchip/llvm-project
[MergeICmps] Simplify the code.
Instead of patching the original blocks, we now generate new blocks and delete the old blocks. This results in simpler code with a less twisted control flow (see the change in `entry-block-shuffled.ll`). This will make https://reviews.llvm.org/D60318 simpler by making it more obvious where control flow created and deleted. Reviewers: gchatelet Subscribers: hiraditya, llvm-commits, spatel Tags: #llvm Differential Revision: https://reviews.llvm.org/D61736 llvm-svn: 360771
This commit is contained in:
parent
2dd6a0c0c3
commit
157ae639fa
|
@ -48,6 +48,7 @@
|
|||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/Transforms/Utils/BuildLibCalls.h"
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
|
@ -406,13 +407,6 @@ class BCECmpChain {
|
|||
First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
|
||||
}
|
||||
|
||||
// Merges the given comparison blocks into one memcmp block and update
|
||||
// branches. Comparisons are assumed to be continguous. If NextBBInChain is
|
||||
// null, the merged block will link to the phi block.
|
||||
void mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
|
||||
BasicBlock *const NextBBInChain, PHINode &Phi,
|
||||
const TargetLibraryInfo *const TLI, AliasAnalysis *AA);
|
||||
|
||||
PHINode &Phi_;
|
||||
std::vector<BCECmpBlock> Comparisons_;
|
||||
// The original entry block (before sorting);
|
||||
|
@ -452,7 +446,7 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
|
|||
// chain before sorting. Unless we can abort the chain at this point
|
||||
// and start anew.
|
||||
//
|
||||
// NOTE: we only handle block with single predecessor for now.
|
||||
// NOTE: we only handle blocks a with single predecessor for now.
|
||||
if (Comparison.canSplit(AA)) {
|
||||
LLVM_DEBUG(dbgs()
|
||||
<< "Split initial block '" << Comparison.BB->getName()
|
||||
|
@ -540,162 +534,173 @@ void BCECmpChain::dump() const {
|
|||
}
|
||||
#endif // MERGEICMPS_DOT_ON
|
||||
|
||||
bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI,
|
||||
AliasAnalysis *AA) {
|
||||
// First pass to check if there is at least one merge. If not, we don't do
|
||||
// anything and we keep analysis passes intact.
|
||||
{
|
||||
bool AtLeastOneMerged = false;
|
||||
for (size_t I = 1; I < Comparisons_.size(); ++I) {
|
||||
if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) {
|
||||
AtLeastOneMerged = true;
|
||||
break;
|
||||
namespace {
|
||||
|
||||
// A class to compute the name of a set of merged basic blocks.
|
||||
// This is optimized for the common case of no block names.
|
||||
class MergedBlockName {
|
||||
// Storage for the uncommon case of several named blocks.
|
||||
SmallString<16> Scratch;
|
||||
|
||||
public:
|
||||
explicit MergedBlockName(ArrayRef<BCECmpBlock> Comparisons)
|
||||
: Name(makeTwine(Comparisons)) {}
|
||||
const Twine Name;
|
||||
|
||||
private:
|
||||
Twine makeTwine(ArrayRef<BCECmpBlock> Comparisons) {
|
||||
assert(!Comparisons.empty() && "no basic block");
|
||||
// Fast path: only one block, or no names at all.
|
||||
if (Comparisons.size() == 1)
|
||||
return Comparisons[0].BB->getName();
|
||||
const int size = std::accumulate(Comparisons.begin(), Comparisons.end(), 0,
|
||||
[](int i, const BCECmpBlock &Cmp) {
|
||||
return i + Cmp.BB->getName().size();
|
||||
});
|
||||
if (size == 0)
|
||||
return Twine();
|
||||
|
||||
// Slow path: at least two blocks, at least one block with a name.
|
||||
Scratch.clear();
|
||||
// We'll have `size` bytes for name and `Comparisons.size() - 1` bytes for
|
||||
// separators.
|
||||
Scratch.reserve(size + Comparisons.size() - 1);
|
||||
const auto append = [this](StringRef str) {
|
||||
Scratch.append(str.begin(), str.end());
|
||||
};
|
||||
append(Comparisons[0].BB->getName());
|
||||
for (int I = 1, E = Comparisons.size(); I < E; ++I) {
|
||||
const BasicBlock *const BB = Comparisons[I].BB;
|
||||
if (!BB->getName().empty()) {
|
||||
append("+");
|
||||
append(BB->getName());
|
||||
}
|
||||
}
|
||||
if (!AtLeastOneMerged) return false;
|
||||
return Twine(Scratch);
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
// Remove phi references to comparison blocks, they will be rebuilt as we
|
||||
// merge the blocks.
|
||||
for (const auto &Comparison : Comparisons_) {
|
||||
Phi_.removeIncomingValue(Comparison.BB, false);
|
||||
}
|
||||
// Merges the given contiguous comparison blocks into one memcmp block.
|
||||
static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
|
||||
BasicBlock *const NextCmpBlock,
|
||||
PHINode &Phi,
|
||||
const TargetLibraryInfo *const TLI,
|
||||
AliasAnalysis *AA) {
|
||||
assert(!Comparisons.empty() && "merging zero comparisons");
|
||||
LLVMContext &Context = NextCmpBlock->getContext();
|
||||
const BCECmpBlock &FirstCmp = Comparisons[0];
|
||||
|
||||
// If entry block is part of the chain, we need to make the first block
|
||||
// of the chain the new entry block of the function.
|
||||
BasicBlock *Entry = &Comparisons_[0].BB->getParent()->getEntryBlock();
|
||||
for (size_t I = 1; I < Comparisons_.size(); ++I) {
|
||||
if (Entry == Comparisons_[I].BB) {
|
||||
BasicBlock *NEntryBB = BasicBlock::Create(Entry->getContext(), "",
|
||||
Entry->getParent(), Entry);
|
||||
BranchInst::Create(Entry, NEntryBB);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Create a new cmp block before next cmp block.
|
||||
BasicBlock *const BB =
|
||||
BasicBlock::Create(Context, MergedBlockName(Comparisons).Name,
|
||||
NextCmpBlock->getParent(), NextCmpBlock);
|
||||
IRBuilder<> Builder(BB);
|
||||
// Add the GEPs from the first BCECmpBlock.
|
||||
Value *const Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone());
|
||||
Value *const Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone());
|
||||
|
||||
// Point the predecessors of the chain to the first comparison block (which is
|
||||
// the new entry point) and update the entry block of the chain.
|
||||
if (EntryBlock_ != Comparisons_[0].BB) {
|
||||
EntryBlock_->replaceAllUsesWith(Comparisons_[0].BB);
|
||||
EntryBlock_ = Comparisons_[0].BB;
|
||||
}
|
||||
Value *IsEqual = nullptr;
|
||||
if (Comparisons.size() == 1) {
|
||||
LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n");
|
||||
Value *const LhsLoad =
|
||||
Builder.CreateLoad(FirstCmp.Lhs().LoadI->getType(), Lhs);
|
||||
Value *const RhsLoad =
|
||||
Builder.CreateLoad(FirstCmp.Rhs().LoadI->getType(), Rhs);
|
||||
// There are no blocks to merge, just do the comparison.
|
||||
IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad);
|
||||
} else {
|
||||
LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n");
|
||||
|
||||
// Effectively merge blocks.
|
||||
int NumMerged = 1;
|
||||
for (size_t I = 1; I < Comparisons_.size(); ++I) {
|
||||
if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) {
|
||||
++NumMerged;
|
||||
} else {
|
||||
// Merge all previous comparisons and start a new merge block.
|
||||
mergeComparisons(
|
||||
makeArrayRef(Comparisons_).slice(I - NumMerged, NumMerged),
|
||||
Comparisons_[I].BB, Phi_, TLI, AA);
|
||||
NumMerged = 1;
|
||||
}
|
||||
}
|
||||
mergeComparisons(makeArrayRef(Comparisons_)
|
||||
.slice(Comparisons_.size() - NumMerged, NumMerged),
|
||||
nullptr, Phi_, TLI, AA);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void BCECmpChain::mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
|
||||
BasicBlock *const NextBBInChain,
|
||||
PHINode &Phi,
|
||||
const TargetLibraryInfo *const TLI,
|
||||
AliasAnalysis *AA) {
|
||||
assert(!Comparisons.empty());
|
||||
const auto &FirstComparison = *Comparisons.begin();
|
||||
BasicBlock *const BB = FirstComparison.BB;
|
||||
LLVMContext &Context = BB->getContext();
|
||||
|
||||
if (Comparisons.size() >= 2) {
|
||||
// If there is one block that requires splitting, we do it now, i.e.
|
||||
// just before we know we will collapse the chain. The instructions
|
||||
// can be executed before any of the instructions in the chain.
|
||||
auto C = std::find_if(Comparisons.begin(), Comparisons.end(),
|
||||
[](const BCECmpBlock &B) { return B.RequireSplit; });
|
||||
if (C != Comparisons.end())
|
||||
C->split(EntryBlock_, AA);
|
||||
const auto ToSplit =
|
||||
std::find_if(Comparisons.begin(), Comparisons.end(),
|
||||
[](const BCECmpBlock &B) { return B.RequireSplit; });
|
||||
if (ToSplit != Comparisons.end()) {
|
||||
LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n");
|
||||
ToSplit->split(BB, AA);
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n");
|
||||
const auto TotalSize =
|
||||
std::accumulate(Comparisons.begin(), Comparisons.end(), 0,
|
||||
[](int Size, const BCECmpBlock &C) {
|
||||
return Size + C.SizeBits();
|
||||
}) /
|
||||
8;
|
||||
const unsigned TotalSizeBits = std::accumulate(
|
||||
Comparisons.begin(), Comparisons.end(), 0u,
|
||||
[](int Size, const BCECmpBlock &C) { return Size + C.SizeBits(); });
|
||||
|
||||
// Incoming edges do not need to be updated, and both GEPs are already
|
||||
// computing the right address, we just need to:
|
||||
// - replace the two loads and the icmp with the memcmp
|
||||
// - update the branch
|
||||
// - update the incoming values in the phi.
|
||||
FirstComparison.BranchI->eraseFromParent();
|
||||
FirstComparison.CmpI->eraseFromParent();
|
||||
FirstComparison.Lhs().LoadI->eraseFromParent();
|
||||
FirstComparison.Rhs().LoadI->eraseFromParent();
|
||||
|
||||
IRBuilder<> Builder(BB);
|
||||
// Create memcmp() == 0.
|
||||
const auto &DL = Phi.getModule()->getDataLayout();
|
||||
Value *const MemCmpCall = emitMemCmp(
|
||||
FirstComparison.Lhs().GEP, FirstComparison.Rhs().GEP,
|
||||
ConstantInt::get(DL.getIntPtrType(Context), TotalSize),
|
||||
Builder, DL, TLI);
|
||||
Value *const MemCmpIsZero = Builder.CreateICmpEQ(
|
||||
Lhs, Rhs,
|
||||
ConstantInt::get(DL.getIntPtrType(Context), TotalSizeBits / 8), Builder,
|
||||
DL, TLI);
|
||||
IsEqual = Builder.CreateICmpEQ(
|
||||
MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0));
|
||||
}
|
||||
|
||||
// Add a branch to the next basic block in the chain.
|
||||
if (NextBBInChain) {
|
||||
Builder.CreateCondBr(MemCmpIsZero, NextBBInChain, Phi.getParent());
|
||||
Phi.addIncoming(ConstantInt::getFalse(Context), BB);
|
||||
} else {
|
||||
Builder.CreateBr(Phi.getParent());
|
||||
Phi.addIncoming(MemCmpIsZero, BB);
|
||||
}
|
||||
|
||||
// Delete merged blocks.
|
||||
for (size_t I = 1; I < Comparisons.size(); ++I) {
|
||||
BasicBlock *CBB = Comparisons[I].BB;
|
||||
CBB->replaceAllUsesWith(BB);
|
||||
CBB->eraseFromParent();
|
||||
}
|
||||
BasicBlock *const PhiBB = Phi.getParent();
|
||||
// Add a branch to the next basic block in the chain.
|
||||
if (NextCmpBlock == PhiBB) {
|
||||
// Continue to phi, passing it the comparison result.
|
||||
Builder.CreateBr(Phi.getParent());
|
||||
Phi.addIncoming(IsEqual, BB);
|
||||
} else {
|
||||
assert(Comparisons.size() == 1);
|
||||
// There are no blocks to merge, but we still need to update the branches.
|
||||
LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n");
|
||||
if (NextBBInChain) {
|
||||
if (FirstComparison.BranchI->isConditional()) {
|
||||
LLVM_DEBUG(dbgs() << "conditional -> conditional\n");
|
||||
// Just update the "true" target, the "false" target should already be
|
||||
// the phi block.
|
||||
assert(FirstComparison.BranchI->getSuccessor(1) == Phi.getParent());
|
||||
FirstComparison.BranchI->setSuccessor(0, NextBBInChain);
|
||||
Phi.addIncoming(ConstantInt::getFalse(Context), BB);
|
||||
} else {
|
||||
LLVM_DEBUG(dbgs() << "unconditional -> conditional\n");
|
||||
// Replace the unconditional branch by a conditional one.
|
||||
FirstComparison.BranchI->eraseFromParent();
|
||||
IRBuilder<> Builder(BB);
|
||||
Builder.CreateCondBr(FirstComparison.CmpI, NextBBInChain,
|
||||
Phi.getParent());
|
||||
Phi.addIncoming(FirstComparison.CmpI, BB);
|
||||
}
|
||||
// Continue to next block if equal, exit to phi else.
|
||||
Builder.CreateCondBr(IsEqual, NextCmpBlock, PhiBB);
|
||||
Phi.addIncoming(ConstantInt::getFalse(Context), BB);
|
||||
}
|
||||
return BB;
|
||||
}
|
||||
|
||||
bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI,
|
||||
AliasAnalysis *AA) {
|
||||
assert(Comparisons_.size() >= 2 && "simplifying trivial BCECmpChain");
|
||||
// First pass to check if there is at least one merge. If not, we don't do
|
||||
// anything and we keep analysis passes intact.
|
||||
const auto AtLeastOneMerged = [this]() {
|
||||
for (size_t I = 1; I < Comparisons_.size(); ++I) {
|
||||
if (IsContiguous(Comparisons_[I - 1], Comparisons_[I]))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
if (!AtLeastOneMerged())
|
||||
return false;
|
||||
|
||||
// Effectively merge blocks. We go in the reverse direction from the phi block
|
||||
// so that the next block is always available to branch to.
|
||||
const auto mergeRange = [this, TLI, AA](int I, int Num, BasicBlock *Next) {
|
||||
return mergeComparisons(makeArrayRef(Comparisons_).slice(I, Num), Next,
|
||||
Phi_, TLI, AA);
|
||||
};
|
||||
int NumMerged = 1;
|
||||
BasicBlock *NextCmpBlock = Phi_.getParent();
|
||||
for (int I = static_cast<int>(Comparisons_.size()) - 2; I >= 0; --I) {
|
||||
if (IsContiguous(Comparisons_[I], Comparisons_[I + 1])) {
|
||||
++NumMerged;
|
||||
} else {
|
||||
if (FirstComparison.BranchI->isConditional()) {
|
||||
LLVM_DEBUG(dbgs() << "conditional -> unconditional\n");
|
||||
// Replace the conditional branch by an unconditional one.
|
||||
FirstComparison.BranchI->eraseFromParent();
|
||||
IRBuilder<> Builder(BB);
|
||||
Builder.CreateBr(Phi.getParent());
|
||||
Phi.addIncoming(FirstComparison.CmpI, BB);
|
||||
} else {
|
||||
LLVM_DEBUG(dbgs() << "unconditional -> unconditional\n");
|
||||
Phi.addIncoming(FirstComparison.CmpI, BB);
|
||||
}
|
||||
NextCmpBlock = mergeRange(I + 1, NumMerged, NextCmpBlock);
|
||||
NumMerged = 1;
|
||||
}
|
||||
}
|
||||
NextCmpBlock = mergeRange(0, NumMerged, NextCmpBlock);
|
||||
|
||||
// Replace the original cmp chain with the new cmp chain by pointing all
|
||||
// predecessors of EntryBlock_ to NextCmpBlock instead. This makes all cmp
|
||||
// blocks in the old chain unreachable.
|
||||
for (BasicBlock *Pred : predecessors(EntryBlock_)) {
|
||||
Pred->getTerminator()->replaceUsesOfWith(EntryBlock_, NextCmpBlock);
|
||||
}
|
||||
EntryBlock_ = nullptr;
|
||||
|
||||
// Delete merged blocks. This also removes incoming values in phi.
|
||||
SmallVector<BasicBlock *, 16> DeadBlocks;
|
||||
for (auto &Cmp : Comparisons_) {
|
||||
DeadBlocks.push_back(Cmp.BB);
|
||||
}
|
||||
DeleteDeadBlocks(DeadBlocks);
|
||||
|
||||
Comparisons_.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi,
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
define zeroext i1 @opeq1(
|
||||
; PPC64LE-LABEL: opeq1:
|
||||
; PPC64LE: # %bb.0: # %entry
|
||||
; PPC64LE: # %bb.0: # %"entry+land.rhs.i"
|
||||
; PPC64LE-NEXT: ld 3, 0(3)
|
||||
; PPC64LE-NEXT: ld 4, 0(4)
|
||||
; PPC64LE-NEXT: xor 3, 3, 4
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
define zeroext i1 @opeq1(
|
||||
; X86-LABEL: opeq1:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86: # %bb.0: # %"entry+land.rhs.i"
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl (%ecx), %edx
|
||||
|
@ -20,7 +20,7 @@ define zeroext i1 @opeq1(
|
|||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: opeq1:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64: # %bb.0: # %"entry+land.rhs.i"
|
||||
; X64-NEXT: movq (%rdi), %rax
|
||||
; X64-NEXT: cmpq (%rsi), %rax
|
||||
; X64-NEXT: sete %al
|
||||
|
|
|
@ -5,19 +5,18 @@
|
|||
|
||||
define zeroext i1 @opeq1(
|
||||
; X86-LABEL: @opeq1(
|
||||
; X86-NEXT: entry:
|
||||
; X86-NEXT: "entry+land.rhs.i+land.rhs.i.2+land.rhs.i.3":
|
||||
; X86-NEXT: [[PTR:%.*]] = alloca i32
|
||||
; X86-NEXT: store i32 42, i32* [[PTR]]
|
||||
; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
|
||||
; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
|
||||
; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[FIRST_I]] to i8*
|
||||
; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[FIRST1_I]] to i8*
|
||||
; X86-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
|
||||
; X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
|
||||
; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8*
|
||||
; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8*
|
||||
; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 16)
|
||||
; X86-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
|
||||
; X86-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0
|
||||
; X86-NEXT: br label [[OPEQ1_EXIT:%.*]]
|
||||
; X86: opeq1.exit:
|
||||
; X86-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[ENTRY:%.*]] ]
|
||||
; X86-NEXT: ret i1 [[TMP1]]
|
||||
; X86-NEXT: ret i1 [[TMP2]]
|
||||
;
|
||||
%S* nocapture readonly dereferenceable(16) %a,
|
||||
%S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
|
||||
|
|
|
@ -3,37 +3,37 @@
|
|||
|
||||
%S = type { i32, i32, i32, i32 }
|
||||
|
||||
; The entry block is part of the chain. It however can not be merged. We need to make the
|
||||
; first comparison block in the chain the new entry block of the function.
|
||||
; The entry block is part of the chain. It however can not be merged. We need to
|
||||
; make sure that the control flow is still consistent (goes through each of the
|
||||
; blocks).
|
||||
|
||||
define zeroext i1 @opeq1(
|
||||
; CHECK-LABEL: @opeq1(
|
||||
; CHECK-NEXT: br label [[LAND_RHS_I:%.*]]
|
||||
; CHECK: entry:
|
||||
; CHECK-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 3
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST_I]], align 4
|
||||
; CHECK-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[FIRST1_I]], align 4
|
||||
; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I_3:%.*]], label [[OPEQ1_EXIT:%.*]]
|
||||
; CHECK: land.rhs.i:
|
||||
; CHECK-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 0
|
||||
; CHECK-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 0
|
||||
; CHECK-NEXT: [[CSTR:%.*]] = bitcast i32* [[SECOND_I]] to i8*
|
||||
; CHECK-NEXT: [[CSTR1:%.*]] = bitcast i32* [[SECOND2_I]] to i8*
|
||||
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8)
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[MEMCMP]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[ENTRY:%.*]], label [[OPEQ1_EXIT]]
|
||||
; CHECK: land.rhs.i.3:
|
||||
; CHECK-NEXT: [[FOURTH_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 3
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[FOURTH_I]], align 4
|
||||
; CHECK-NEXT: [[FOURTH2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 3
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[FOURTH2_I]], align 4
|
||||
; CHECK-NEXT: [[CMP5_I:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
|
||||
; CHECK-NEXT: "land.rhs.i+land.rhs.i.2":
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
|
||||
; CHECK-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8*
|
||||
; CHECK-NEXT: [[CSTR3:%.*]] = bitcast i32* [[TMP1]] to i8*
|
||||
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR3]], i64 8)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[ENTRY2:%.*]], label [[OPEQ1_EXIT:%.*]]
|
||||
; CHECK: entry2:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 3
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
|
||||
; CHECK-NEXT: br i1 [[TMP7]], label [[LAND_RHS_I_31:%.*]], label [[OPEQ1_EXIT]]
|
||||
; CHECK: land.rhs.i.31:
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 3
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 3
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP10]], [[TMP11]]
|
||||
; CHECK-NEXT: br label [[OPEQ1_EXIT]]
|
||||
; CHECK: opeq1.exit:
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ false, [[LAND_RHS_I]] ], [ false, [[ENTRY]] ], [ [[CMP5_I]], [[LAND_RHS_I_3]] ]
|
||||
; CHECK-NEXT: ret i1 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = phi i1 [ [[TMP12]], [[LAND_RHS_I_31]] ], [ false, [[ENTRY2]] ], [ false, %"land.rhs.i+land.rhs.i.2" ]
|
||||
; CHECK-NEXT: ret i1 [[TMP13]]
|
||||
;
|
||||
%S* nocapture readonly dereferenceable(16) %a,
|
||||
%S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
|
||||
|
|
|
@ -23,18 +23,18 @@ define zeroext i1 @opeq1(
|
|||
; X86-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4
|
||||
; X86-NEXT: call void (...) @foo()
|
||||
; X86-NEXT: [[CMP2_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
|
||||
; X86-NEXT: br i1 [[CMP2_I]], label [[LAND_RHS_I_2:%.*]], label [[OPEQ1_EXIT]]
|
||||
; X86: land.rhs.i.2:
|
||||
; X86-NEXT: [[THIRD_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 2
|
||||
; X86-NEXT: [[THIRD2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 2
|
||||
; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[THIRD_I]] to i8*
|
||||
; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[THIRD2_I]] to i8*
|
||||
; X86-NEXT: br i1 [[CMP2_I]], label %"land.rhs.i.2+land.rhs.i.3", label [[OPEQ1_EXIT]]
|
||||
; X86: "land.rhs.i.2+land.rhs.i.3":
|
||||
; X86-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 2
|
||||
; X86-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 2
|
||||
; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP4]] to i8*
|
||||
; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP5]] to i8*
|
||||
; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8)
|
||||
; X86-NEXT: [[TMP4:%.*]] = icmp eq i32 [[MEMCMP]], 0
|
||||
; X86-NEXT: [[TMP6:%.*]] = icmp eq i32 [[MEMCMP]], 0
|
||||
; X86-NEXT: br label [[OPEQ1_EXIT]]
|
||||
; X86: opeq1.exit:
|
||||
; X86-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ false, [[LAND_RHS_I]] ], [ [[TMP4]], [[LAND_RHS_I_2]] ]
|
||||
; X86-NEXT: ret i1 [[TMP5]]
|
||||
; X86-NEXT: [[TMP7:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ false, [[LAND_RHS_I]] ], [ [[TMP6]], %"land.rhs.i.2+land.rhs.i.3" ]
|
||||
; X86-NEXT: ret i1 [[TMP7]]
|
||||
;
|
||||
%S* nocapture readonly dereferenceable(16) %a,
|
||||
%S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
|
||||
|
|
|
@ -6,17 +6,16 @@
|
|||
|
||||
define zeroext i1 @opeq1(
|
||||
; X86-LABEL: @opeq1(
|
||||
; X86-NEXT: entry:
|
||||
; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
|
||||
; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
|
||||
; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[FIRST_I]] to i8*
|
||||
; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[FIRST1_I]] to i8*
|
||||
; X86-NEXT: "entry+land.rhs.i":
|
||||
; X86-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
|
||||
; X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
|
||||
; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8*
|
||||
; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8*
|
||||
; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8)
|
||||
; X86-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
|
||||
; X86-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0
|
||||
; X86-NEXT: br label [[OPEQ1_EXIT:%.*]]
|
||||
; X86: opeq1.exit:
|
||||
; X86-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[ENTRY:%.*]] ]
|
||||
; X86-NEXT: ret i1 [[TMP1]]
|
||||
; X86-NEXT: ret i1 [[TMP2]]
|
||||
;
|
||||
; X86-NOBUILTIN-LABEL: @opeq1(
|
||||
; X86-NOBUILTIN-NEXT: entry:
|
||||
|
@ -67,17 +66,15 @@ opeq1.exit:
|
|||
; Same as above, but the two blocks are in inverse order.
|
||||
define zeroext i1 @opeq1_inverse(
|
||||
; X86-LABEL: @opeq1_inverse(
|
||||
; X86-NEXT: br label [[LAND_RHS_I:%.*]]
|
||||
; X86: land.rhs.i:
|
||||
; X86-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
|
||||
; X86-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
|
||||
; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[SECOND_I]] to i8*
|
||||
; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[SECOND2_I]] to i8*
|
||||
; X86-NEXT: "land.rhs.i+entry":
|
||||
; X86-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
|
||||
; X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
|
||||
; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8*
|
||||
; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8*
|
||||
; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8)
|
||||
; X86-NEXT: [[TMP1:%.*]] = icmp eq i32 [[MEMCMP]], 0
|
||||
; X86-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0
|
||||
; X86-NEXT: br label [[OPEQ1_EXIT:%.*]]
|
||||
; X86: opeq1.exit:
|
||||
; X86-NEXT: [[TMP2:%.*]] = phi i1 [ [[TMP1]], [[LAND_RHS_I]] ]
|
||||
; X86-NEXT: ret i1 [[TMP2]]
|
||||
;
|
||||
; X86-NOBUILTIN-LABEL: @opeq1_inverse(
|
||||
|
|
|
@ -8,18 +8,17 @@ declare void @foo(...) nounwind readnone
|
|||
; We can split %entry and create a memcmp(16 bytes).
|
||||
define zeroext i1 @opeq1(
|
||||
; X86-LABEL: @opeq1(
|
||||
; X86-NEXT: entry:
|
||||
; X86-NEXT: "entry+land.rhs.i+land.rhs.i.2+land.rhs.i.3":
|
||||
; X86-NEXT: call void (...) @foo()
|
||||
; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
|
||||
; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
|
||||
; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[FIRST_I]] to i8*
|
||||
; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[FIRST1_I]] to i8*
|
||||
; X86-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0
|
||||
; X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0
|
||||
; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8*
|
||||
; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8*
|
||||
; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 16)
|
||||
; X86-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
|
||||
; X86-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0
|
||||
; X86-NEXT: br label [[OPEQ1_EXIT:%.*]]
|
||||
; X86: opeq1.exit:
|
||||
; X86-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[ENTRY:%.*]] ]
|
||||
; X86-NEXT: ret i1 [[TMP1]]
|
||||
; X86-NEXT: ret i1 [[TMP2]]
|
||||
;
|
||||
; Make sure this call is moved to the beginning of the entry block.
|
||||
%S* nocapture readonly dereferenceable(16) %a,
|
||||
|
|
Loading…
Reference in New Issue