Do not select EhPad BB in MachineBlockPlacement when there is regular BB to schedule

Summary:
EHPad BB are not entered the classic way and therefor do not need to be placed after their predecessors. This patch make sure EHPad BB are not chosen amongst successors to form chains, and are selected as last resort when selecting the best candidate.

EHPad are scheduled in reverse probability order in order to have them flow into each others naturally.

Reviewers: chandlerc, majnemer, rafael, MatzeB, escha, silvas

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D17625

llvm-svn: 265726
This commit is contained in:
Amaury Sechet 2016-04-07 21:29:39 +00:00
parent 846219ae10
commit c53ad4f3b2
6 changed files with 171 additions and 76 deletions

View File

@ -262,6 +262,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
SmallVectorImpl<MachineBasicBlock *> &EHPadWorkList,
const BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
BlockChain &Chain,
@ -282,9 +283,11 @@ class MachineBlockPlacement : public MachineFunctionPass {
void fillWorkLists(MachineBasicBlock *MBB,
SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
SmallVectorImpl<MachineBasicBlock *> &EHPadWorkList,
const BlockFilterSet *BlockFilter);
void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
SmallVectorImpl<MachineBasicBlock *> &EHPadWorkList,
const BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *findBestLoopTop(MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
@ -350,6 +353,7 @@ static std::string getBlockName(MachineBasicBlock *BB) {
void MachineBlockPlacement::markChainSuccessors(
BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
SmallVectorImpl<MachineBasicBlock *> &EHPadWorkList,
const BlockFilterSet *BlockFilter) {
// Walk all the blocks in this chain, marking their successors as having
// a predecessor placed.
@ -368,8 +372,15 @@ void MachineBlockPlacement::markChainSuccessors(
// This is a cross-chain edge that is within the loop, so decrement the
// loop predecessor count of the destination chain.
if (SuccChain.UnscheduledPredecessors > 0 && --SuccChain.UnscheduledPredecessors == 0)
BlockWorkList.push_back(*SuccChain.begin());
if (SuccChain.UnscheduledPredecessors == 0 ||
--SuccChain.UnscheduledPredecessors > 0)
continue;
auto *MBB = *SuccChain.begin();
if (MBB->isEHPad())
EHPadWorkList.push_back(MBB);
else
BlockWorkList.push_back(MBB);
}
}
}
@ -412,7 +423,7 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
SmallVector<MachineBasicBlock *, 4> Successors;
for (MachineBasicBlock *Succ : BB->successors()) {
bool SkipSucc = false;
if (BlockFilter && !BlockFilter->count(Succ)) {
if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
SkipSucc = true;
} else {
BlockChain *SuccChain = BlockToChain[Succ];
@ -532,9 +543,16 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
}),
WorkList.end());
if (WorkList.empty())
return nullptr;
bool IsEHPad = WorkList[0]->isEHPad();
MachineBasicBlock *BestBlock = nullptr;
BlockFrequency BestFreq;
for (MachineBasicBlock *MBB : WorkList) {
assert(MBB->isEHPad() == IsEHPad);
BlockChain &SuccChain = *BlockToChain[MBB];
if (&SuccChain == &Chain)
continue;
@ -544,11 +562,32 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
DEBUG(dbgs() << " " << getBlockName(MBB) << " -> ";
MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
if (BestBlock && BestFreq >= CandidateFreq)
// For ehpad, we layout the least probable first as to avoid jumping back
// from least probable landingpads to more probable ones.
//
// FIXME: Using probability is probably (!) not the best way to achieve
// this. We should probably have a more principled approach to layout
// cleanup code.
//
// The goal is to get:
//
// +--------------------------+
// | V
// InnerLp -> InnerCleanup OuterLp -> OuterCleanup -> Resume
//
// Rather than:
//
// +-------------------------------------+
// V |
// OuterLp -> OuterCleanup -> Resume InnerLp -> InnerCleanup
if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
continue;
BestBlock = MBB;
BestFreq = CandidateFreq;
}
return BestBlock;
}
@ -582,6 +621,7 @@ void MachineBlockPlacement::fillWorkLists(
MachineBasicBlock *MBB,
SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
SmallVectorImpl<MachineBasicBlock *> &EHPadWorkList,
const BlockFilterSet *BlockFilter = nullptr) {
BlockChain &Chain = *BlockToChain[MBB];
if (!UpdatedPreds.insert(&Chain).second)
@ -599,13 +639,20 @@ void MachineBlockPlacement::fillWorkLists(
}
}
if (Chain.UnscheduledPredecessors == 0)
BlockWorkList.push_back(*Chain.begin());
if (Chain.UnscheduledPredecessors != 0)
return;
MBB = *Chain.begin();
if (MBB->isEHPad())
EHPadWorkList.push_back(MBB);
else
BlockWorkList.push_back(MBB);
}
void MachineBlockPlacement::buildChain(
MachineBasicBlock *BB, BlockChain &Chain,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
SmallVectorImpl<MachineBasicBlock *> &EHPadWorkList,
const BlockFilterSet *BlockFilter) {
assert(BB);
assert(BlockToChain[BB] == &Chain);
@ -613,7 +660,8 @@ void MachineBlockPlacement::buildChain(
MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
MachineBasicBlock *LoopHeaderBB = BB;
markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, EHPadWorkList,
BlockFilter);
BB = *std::prev(Chain.end());
for (;;) {
assert(BB);
@ -629,6 +677,8 @@ void MachineBlockPlacement::buildChain(
// this point. This won't be a fallthrough, but it will increase locality.
if (!BestSucc)
BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
if (!BestSucc)
BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
if (!BestSucc) {
BestSucc =
@ -647,7 +697,8 @@ void MachineBlockPlacement::buildChain(
SuccChain.UnscheduledPredecessors = 0;
DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to "
<< getBlockName(BestSucc) << "\n");
markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, EHPadWorkList,
BlockFilter);
Chain.merge(BestSucc, &SuccChain);
BB = *std::prev(Chain.end());
}
@ -1067,6 +1118,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
buildLoopChains(F, *InnerLoop);
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
SmallVector<MachineBasicBlock *, 16> EHPadWorkList;
BlockFilterSet LoopBlockSet = collectLoopBlockSet(F, L);
// Check if we have profile data for this function. If yes, we will rotate
@ -1100,9 +1152,10 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
UpdatedPreds.insert(&LoopChain);
for (MachineBasicBlock *LoopBB : LoopBlockSet)
fillWorkLists(LoopBB, UpdatedPreds, BlockWorkList, &LoopBlockSet);
fillWorkLists(LoopBB, UpdatedPreds, BlockWorkList, EHPadWorkList,
&LoopBlockSet);
buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet);
buildChain(LoopTop, LoopChain, BlockWorkList, EHPadWorkList, &LoopBlockSet);
if (RotateLoopWithProfile)
rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
@ -1199,13 +1252,14 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
buildLoopChains(F, *L);
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
SmallVector<MachineBasicBlock *, 16> EHPadWorkList;
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
for (MachineBasicBlock &MBB : F)
fillWorkLists(&MBB, UpdatedPreds, BlockWorkList);
fillWorkLists(&MBB, UpdatedPreds, BlockWorkList, EHPadWorkList);
BlockChain &FunctionChain = *BlockToChain[&F.front()];
buildChain(&F.front(), FunctionChain, BlockWorkList);
buildChain(&F.front(), FunctionChain, BlockWorkList, EHPadWorkList);
#ifndef NDEBUG
typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;

View File

@ -1,52 +0,0 @@
; RUN: llc < %s | FileCheck %s
; CHECK: .long .Ltmp9-.Ltmp8 # Call between .Ltmp8 and .Ltmp9
; We used to crash in filetype=obj when computing a negative value.
; RUN: llc -filetype=obj < %s
target triple = "powerpc--netbsd"
@_ZTI1I = external constant { i8*, i8* }
define void @f(i8 %foo, i32 %bar) personality i8* bitcast (void ()* @g to i8*) {
invoke void @g()
to label %try.cont unwind label %lpad
lpad: ; preds = %0
%tmp = landingpad { i8*, i32 }
catch i8* bitcast ({ i8*, i8* }* @_ZTI1I to i8*)
br i1 undef, label %catch10, label %catch
catch10: ; preds = %lpad
%tmp8 = load i32, i32* undef, align 4
%conv.i.i = zext i8 %foo to i32
%cond.i.i = select i1 undef, i32 %conv.i.i, i32 %tmp8
invoke void @_Z24__put_character_sequenceIccEvR1AIT_T0_Ej(i32 %cond.i.i)
to label %invoke.cont20 unwind label %lpad15
invoke.cont20: ; preds = %catch10
ret void
try.cont: ; preds = %0
ret void
catch: ; preds = %lpad
%tmp14 = load i32, i32* undef, align 4
%conv.i.i34 = zext i8 %foo to i32
%cond.i.i35 = select i1 undef, i32 %conv.i.i34, i32 %tmp14
invoke void @_Z24__put_character_sequenceIccEvR1AIT_T0_Ej(i32 %cond.i.i35)
to label %invoke.cont8 unwind label %lpad3
invoke.cont8: ; preds = %call2.i.i.noexc36
ret void
lpad3: ; preds = %call2.i.i.noexc36, %catch
%tmp16 = landingpad { i8*, i32 }
cleanup
invoke void @g()
to label %eh.resume unwind label %terminate.lpad
lpad15: ; preds = %catch10
%tmp19 = landingpad { i8*, i32 }
cleanup
invoke void @g()
to label %eh.resume unwind label %terminate.lpad
eh.resume: ; preds = %lpad15, %lpad3
ret void
terminate.lpad: ; preds = %lpad15, %lpad3
%tmp22 = landingpad { i8*, i32 }
catch i8* null
ret void
}
declare void @g()
declare void @_Z24__put_character_sequenceIccEvR1AIT_T0_Ej(i32)

View File

@ -1083,3 +1083,96 @@ exit:
%ret = phi i32 [ %val1, %then ], [ %val2, %else ]
ret i32 %ret
}
; Make sure we put landingpads out of the way.
declare i32 @pers(...)
declare i32 @foo();
declare i32 @bar();
define i32 @test_lp(i32 %a) personality i32 (...)* @pers {
; CHECK-LABEL: test_lp:
; CHECK: %entry
; CHECK: %hot
; CHECK: %then
; CHECK: %cold
; CHECK: %coldlp
; CHECK: %hotlp
; CHECK: %lpret
entry:
%0 = icmp sgt i32 %a, 1
br i1 %0, label %hot, label %cold, !prof !4
hot:
%1 = invoke i32 @foo()
to label %then unwind label %hotlp
cold:
%2 = invoke i32 @bar()
to label %then unwind label %coldlp
then:
%3 = phi i32 [ %1, %hot ], [ %2, %cold ]
ret i32 %3
hotlp:
%4 = landingpad { i8*, i32 }
cleanup
br label %lpret
coldlp:
%5 = landingpad { i8*, i32 }
cleanup
br label %lpret
lpret:
%6 = phi i32 [-1, %hotlp], [-2, %coldlp]
%7 = add i32 %6, 42
ret i32 %7
}
!4 = !{!"branch_weights", i32 65536, i32 0}
; Make sure that ehpad are scheduled from the least probable one
; to the most probable one. See selectBestCandidateBlock as to why.
declare void @clean();
define void @test_flow_unwind() personality i32 (...)* @pers {
; CHECK-LABEL: test_flow_unwind:
; CHECK: %entry
; CHECK: %then
; CHECK: %exit
; CHECK: %innerlp
; CHECK: %outerlp
; CHECK: %outercleanup
entry:
%0 = invoke i32 @foo()
to label %then unwind label %outerlp
then:
%1 = invoke i32 @bar()
to label %exit unwind label %innerlp
exit:
ret void
innerlp:
%2 = landingpad { i8*, i32 }
cleanup
br label %innercleanup
outerlp:
%3 = landingpad { i8*, i32 }
cleanup
br label %outercleanup
outercleanup:
%4 = phi { i8*, i32 } [%2, %innercleanup], [%3, %outerlp]
call void @clean()
resume { i8*, i32 } %4
innercleanup:
call void @clean()
br label %outercleanup
}

View File

@ -65,13 +65,13 @@ __try.cont:
; Landing pad code
; CHECK: [[handler0:LBB0_[0-9]+]]: # %handler0
; CHECK: [[handler1:LBB0_[0-9]+]]: # %handler1
; Restore SP
; CHECK: movl {{.*}}(%ebp), %esp
; CHECK: calll _puts
; CHECK: jmp [[cont_bb]]
; CHECK: [[handler1:LBB0_[0-9]+]]: # %handler1
; CHECK: [[handler0:LBB0_[0-9]+]]: # %handler0
; Restore SP
; CHECK: movl {{.*}}(%ebp), %esp
; CHECK: calll _puts

View File

@ -67,16 +67,16 @@ __try.cont:
; Landing pad code
; CHECK: [[handler0:\.LBB0_[0-9]+]]: # %handler0
; CHECK: callq puts
; CHECK: movl $-1, [[rloc]]
; CHECK: jmp [[cont_bb]]
; CHECK: [[handler1:\.LBB0_[0-9]+]]: # %handler1
; CHECK: callq puts
; CHECK: movl $-2, [[rloc]]
; CHECK: jmp [[cont_bb]]
; CHECK: [[handler0:\.LBB0_[0-9]+]]: # %handler0
; CHECK: callq puts
; CHECK: movl $-1, [[rloc]]
; CHECK: jmp [[cont_bb]]
; CHECK: .seh_handlerdata
; CHECK-NEXT: .Lsafe_div$parent_frame_offset
; CHECK-NEXT: .long (.Llsda_end0-.Llsda_begin0)/16

View File

@ -63,17 +63,17 @@ try.cont:
; X86: [[contbb:LBB0_[0-9]+]]: # %try.cont
; X86: retl
; X86: [[restorebb1:LBB0_[0-9]+]]: # Block address taken
; X86-NEXT: # %handler1
; X86-NEXT: addl $12, %ebp
; X86: jmp [[contbb]]
; FIXME: These should be de-duplicated.
; X86: [[restorebb2:LBB0_[0-9]+]]: # Block address taken
; X86-NEXT: # %handler2
; X86-NEXT: addl $12, %ebp
; X86: jmp [[contbb]]
; X86: [[restorebb1:LBB0_[0-9]+]]: # Block address taken
; X86-NEXT: # %handler1
; X86-NEXT: addl $12, %ebp
; X86: jmp [[contbb]]
; X86: "?catch$[[catch1bb:[0-9]+]]@?0?try_catch_catch@4HA":
; X86: LBB0_[[catch1bb]]: # %handler1{{$}}
; X86: pushl %ebp