forked from OSchip/llvm-project
Re-apply "[JumpThreading] Ignore free instructions"
It seems the crashes we saw wasn't caused by this (see comments on the review).
> This is basically D108837 but for jump threading. Free instructions
> should be ignored for the threading decision. JumpThreading already
> skips some free instructions (like pointer bitcasts), but does not
> skip various free intrinsics -- in fact, it currently gives them a
> fairly large cost of 2.
>
> Differential Revision: https://reviews.llvm.org/D110290
This reverts commit 4604695d7c
.
This commit is contained in:
parent
321be24aa6
commit
1e9afab875
|
@ -44,6 +44,7 @@ class PHINode;
|
|||
class SelectInst;
|
||||
class SwitchInst;
|
||||
class TargetLibraryInfo;
|
||||
class TargetTransformInfo;
|
||||
class Value;
|
||||
|
||||
/// A private "module" namespace for types and utilities used by
|
||||
|
@ -78,6 +79,7 @@ enum ConstantPreference { WantInteger, WantBlockAddress };
|
|||
/// revectored to the false side of the second if.
|
||||
class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
|
||||
TargetLibraryInfo *TLI;
|
||||
TargetTransformInfo *TTI;
|
||||
LazyValueInfo *LVI;
|
||||
AAResults *AA;
|
||||
DomTreeUpdater *DTU;
|
||||
|
@ -99,9 +101,9 @@ public:
|
|||
JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1);
|
||||
|
||||
// Glue for old PM.
|
||||
bool runImpl(Function &F, TargetLibraryInfo *TLI, LazyValueInfo *LVI,
|
||||
AAResults *AA, DomTreeUpdater *DTU, bool HasProfileData,
|
||||
std::unique_ptr<BlockFrequencyInfo> BFI,
|
||||
bool runImpl(Function &F, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
|
||||
LazyValueInfo *LVI, AAResults *AA, DomTreeUpdater *DTU,
|
||||
bool HasProfileData, std::unique_ptr<BlockFrequencyInfo> BFI,
|
||||
std::unique_ptr<BranchProbabilityInfo> BPI);
|
||||
|
||||
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
|
||||
|
|
|
@ -331,7 +331,7 @@ bool JumpThreading::runOnFunction(Function &F) {
|
|||
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
|
||||
}
|
||||
|
||||
bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, F.hasProfileData(),
|
||||
bool Changed = Impl.runImpl(F, TLI, TTI, LVI, AA, &DTU, F.hasProfileData(),
|
||||
std::move(BFI), std::move(BPI));
|
||||
if (PrintLVIAfterJumpThreading) {
|
||||
dbgs() << "LVI for function '" << F.getName() << "':\n";
|
||||
|
@ -360,7 +360,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
|
|||
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
|
||||
}
|
||||
|
||||
bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, F.hasProfileData(),
|
||||
bool Changed = runImpl(F, &TLI, &TTI, &LVI, &AA, &DTU, F.hasProfileData(),
|
||||
std::move(BFI), std::move(BPI));
|
||||
|
||||
if (PrintLVIAfterJumpThreading) {
|
||||
|
@ -377,12 +377,14 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
|
|||
}
|
||||
|
||||
bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
|
||||
LazyValueInfo *LVI_, AliasAnalysis *AA_,
|
||||
DomTreeUpdater *DTU_, bool HasProfileData_,
|
||||
TargetTransformInfo *TTI_, LazyValueInfo *LVI_,
|
||||
AliasAnalysis *AA_, DomTreeUpdater *DTU_,
|
||||
bool HasProfileData_,
|
||||
std::unique_ptr<BlockFrequencyInfo> BFI_,
|
||||
std::unique_ptr<BranchProbabilityInfo> BPI_) {
|
||||
LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
|
||||
TLI = TLI_;
|
||||
TTI = TTI_;
|
||||
LVI = LVI_;
|
||||
AA = AA_;
|
||||
DTU = DTU_;
|
||||
|
@ -514,7 +516,8 @@ static void replaceFoldableUses(Instruction *Cond, Value *ToVal) {
|
|||
/// Return the cost of duplicating a piece of this block from first non-phi
|
||||
/// and before StopAt instruction to thread across it. Stop scanning the block
|
||||
/// when exceeding the threshold. If duplication is impossible, returns ~0U.
|
||||
static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
|
||||
static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI,
|
||||
BasicBlock *BB,
|
||||
Instruction *StopAt,
|
||||
unsigned Threshold) {
|
||||
assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
|
||||
|
@ -550,26 +553,21 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
|
|||
if (Size > Threshold)
|
||||
return Size;
|
||||
|
||||
// Debugger intrinsics don't incur code size.
|
||||
if (isa<DbgInfoIntrinsic>(I)) continue;
|
||||
|
||||
// Pseudo-probes don't incur code size.
|
||||
if (isa<PseudoProbeInst>(I))
|
||||
continue;
|
||||
|
||||
// If this is a pointer->pointer bitcast, it is free.
|
||||
if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
|
||||
continue;
|
||||
|
||||
// Freeze instruction is free, too.
|
||||
if (isa<FreezeInst>(I))
|
||||
continue;
|
||||
|
||||
// Bail out if this instruction gives back a token type, it is not possible
|
||||
// to duplicate it if it is used outside this BB.
|
||||
if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
|
||||
return ~0U;
|
||||
|
||||
// Blocks with NoDuplicate are modelled as having infinite cost, so they
|
||||
// are never duplicated.
|
||||
if (const CallInst *CI = dyn_cast<CallInst>(I))
|
||||
if (CI->cannotDuplicate() || CI->isConvergent())
|
||||
return ~0U;
|
||||
|
||||
if (TTI->getUserCost(&*I, TargetTransformInfo::TCK_SizeAndLatency)
|
||||
== TargetTransformInfo::TCC_Free)
|
||||
continue;
|
||||
|
||||
// All other instructions count for at least one unit.
|
||||
++Size;
|
||||
|
||||
|
@ -578,11 +576,7 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
|
|||
// as having cost of 2 total, and if they are a vector intrinsic, we model
|
||||
// them as having cost 1.
|
||||
if (const CallInst *CI = dyn_cast<CallInst>(I)) {
|
||||
if (CI->cannotDuplicate() || CI->isConvergent())
|
||||
// Blocks with NoDuplicate are modelled as having infinite cost, so they
|
||||
// are never duplicated.
|
||||
return ~0U;
|
||||
else if (!isa<IntrinsicInst>(CI))
|
||||
if (!isa<IntrinsicInst>(CI))
|
||||
Size += 3;
|
||||
else if (!CI->getType()->isVectorTy())
|
||||
Size += 1;
|
||||
|
@ -2234,10 +2228,10 @@ bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB,
|
|||
}
|
||||
|
||||
// Compute the cost of duplicating BB and PredBB.
|
||||
unsigned BBCost =
|
||||
getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
|
||||
unsigned BBCost = getJumpThreadDuplicationCost(
|
||||
TTI, BB, BB->getTerminator(), BBDupThreshold);
|
||||
unsigned PredBBCost = getJumpThreadDuplicationCost(
|
||||
PredBB, PredBB->getTerminator(), BBDupThreshold);
|
||||
TTI, PredBB, PredBB->getTerminator(), BBDupThreshold);
|
||||
|
||||
// Give up if costs are too high. We need to check BBCost and PredBBCost
|
||||
// individually before checking their sum because getJumpThreadDuplicationCost
|
||||
|
@ -2345,8 +2339,8 @@ bool JumpThreadingPass::tryThreadEdge(
|
|||
return false;
|
||||
}
|
||||
|
||||
unsigned JumpThreadCost =
|
||||
getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
|
||||
unsigned JumpThreadCost = getJumpThreadDuplicationCost(
|
||||
TTI, BB, BB->getTerminator(), BBDupThreshold);
|
||||
if (JumpThreadCost > BBDupThreshold) {
|
||||
LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
|
||||
<< "' - Cost is too high: " << JumpThreadCost << "\n");
|
||||
|
@ -2614,8 +2608,8 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
|
|||
return false;
|
||||
}
|
||||
|
||||
unsigned DuplicationCost =
|
||||
getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
|
||||
unsigned DuplicationCost = getJumpThreadDuplicationCost(
|
||||
TTI, BB, BB->getTerminator(), BBDupThreshold);
|
||||
if (DuplicationCost > BBDupThreshold) {
|
||||
LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
|
||||
<< "' - Cost is too high: " << DuplicationCost << "\n");
|
||||
|
@ -3031,7 +3025,8 @@ bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard,
|
|||
|
||||
ValueToValueMapTy UnguardedMapping, GuardedMapping;
|
||||
Instruction *AfterGuard = Guard->getNextNode();
|
||||
unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold);
|
||||
unsigned Cost =
|
||||
getJumpThreadDuplicationCost(TTI, BB, AfterGuard, BBDupThreshold);
|
||||
if (Cost > BBDupThreshold)
|
||||
return false;
|
||||
// Duplicate all instructions before the guard and the guard itself to the
|
||||
|
|
|
@ -5,26 +5,28 @@
|
|||
; the jump threading threshold, as everything else are free instructions.
|
||||
define i32 @free_instructions(i1 %c, i32* %p) {
|
||||
; CHECK-LABEL: @free_instructions(
|
||||
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
|
||||
; CHECK: if:
|
||||
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF2:%.*]], label [[ELSE2:%.*]]
|
||||
; CHECK: if2:
|
||||
; CHECK-NEXT: store i32 -1, i32* [[P:%.*]], align 4
|
||||
; CHECK-NEXT: br label [[JOIN:%.*]]
|
||||
; CHECK: else:
|
||||
; CHECK-NEXT: store i32 -2, i32* [[P]], align 4
|
||||
; CHECK-NEXT: br label [[JOIN]]
|
||||
; CHECK: join:
|
||||
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]])
|
||||
; CHECK-NEXT: store i32 1, i32* [[P]], align 4, !noalias !0
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[P]], i64 32) ]
|
||||
; CHECK-NEXT: store i32 2, i32* [[P]], align 4
|
||||
; CHECK-NEXT: [[P21:%.*]] = bitcast i32* [[P]] to i8*
|
||||
; CHECK-NEXT: [[P32:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* [[P21]])
|
||||
; CHECK-NEXT: [[P43:%.*]] = bitcast i8* [[P32]] to i32*
|
||||
; CHECK-NEXT: store i32 3, i32* [[P43]], align 4, !invariant.group !3
|
||||
; CHECK-NEXT: ret i32 0
|
||||
; CHECK: else2:
|
||||
; CHECK-NEXT: store i32 -2, i32* [[P]], align 4
|
||||
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META4:![0-9]+]])
|
||||
; CHECK-NEXT: store i32 1, i32* [[P]], align 4, !noalias !4
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[P]], i64 32) ]
|
||||
; CHECK-NEXT: store i32 2, i32* [[P]], align 4
|
||||
; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8*
|
||||
; CHECK-NEXT: [[P3:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* [[P2]])
|
||||
; CHECK-NEXT: [[P4:%.*]] = bitcast i8* [[P3]] to i32*
|
||||
; CHECK-NEXT: store i32 3, i32* [[P4]], align 4, !invariant.group !3
|
||||
; CHECK-NEXT: br i1 [[C]], label [[IF2:%.*]], label [[ELSE2:%.*]]
|
||||
; CHECK: if2:
|
||||
; CHECK-NEXT: ret i32 0
|
||||
; CHECK: else2:
|
||||
; CHECK-NEXT: ret i32 1
|
||||
;
|
||||
br i1 %c, label %if, label %else
|
||||
|
|
|
@ -32,13 +32,10 @@ define void @caller1(i1 %c, i64* align 1 %ptr) {
|
|||
; ASSUMPTIONS-OFF-NEXT: br label [[COMMON_RET]]
|
||||
;
|
||||
; ASSUMPTIONS-ON-LABEL: @caller1(
|
||||
; ASSUMPTIONS-ON-NEXT: br i1 [[C:%.*]], label [[COMMON_RET:%.*]], label [[FALSE1:%.*]]
|
||||
; ASSUMPTIONS-ON: false1:
|
||||
; ASSUMPTIONS-ON-NEXT: store volatile i64 1, i64* [[PTR:%.*]], align 4
|
||||
; ASSUMPTIONS-ON-NEXT: br label [[COMMON_RET]]
|
||||
; ASSUMPTIONS-ON-NEXT: br i1 [[C:%.*]], label [[COMMON_RET:%.*]], label [[FALSE2:%.*]]
|
||||
; ASSUMPTIONS-ON: common.ret:
|
||||
; ASSUMPTIONS-ON-NEXT: [[DOTSINK:%.*]] = phi i64 [ 3, [[FALSE1]] ], [ 2, [[TMP0:%.*]] ]
|
||||
; ASSUMPTIONS-ON-NEXT: call void @llvm.assume(i1 true) [ "align"(i64* [[PTR]], i64 8) ]
|
||||
; ASSUMPTIONS-ON-NEXT: [[DOTSINK:%.*]] = phi i64 [ 3, [[FALSE2]] ], [ 2, [[TMP0:%.*]] ]
|
||||
; ASSUMPTIONS-ON-NEXT: call void @llvm.assume(i1 true) [ "align"(i64* [[PTR:%.*]], i64 8) ]
|
||||
; ASSUMPTIONS-ON-NEXT: store volatile i64 0, i64* [[PTR]], align 8
|
||||
; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
|
||||
; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
|
||||
|
@ -47,6 +44,9 @@ define void @caller1(i1 %c, i64* align 1 %ptr) {
|
|||
; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
|
||||
; ASSUMPTIONS-ON-NEXT: store volatile i64 [[DOTSINK]], i64* [[PTR]], align 8
|
||||
; ASSUMPTIONS-ON-NEXT: ret void
|
||||
; ASSUMPTIONS-ON: false2:
|
||||
; ASSUMPTIONS-ON-NEXT: store volatile i64 1, i64* [[PTR]], align 4
|
||||
; ASSUMPTIONS-ON-NEXT: br label [[COMMON_RET]]
|
||||
;
|
||||
br i1 %c, label %true1, label %false1
|
||||
|
||||
|
|
Loading…
Reference in New Issue