[BranchFolding] Fix PR43964 about branch folder not being debug invariant

Summary:
The fix in BranchFolder related to non debug invariant problems
done in commit ec32dff0b0 actually introduced some new
problems with debug invariance.

Before that patch ComputeCommonTailLength would move iterators
back, past debug instructions, in order to make ProfitableToMerge
make consistent answers "when one block differs from the other
only by whether debugging pseudos are present at the beginning".
But the changes in ec32dff0b0 undid that by moving the iterators
forward again.

This patch refactors ComputeCommonTailLength. The function was
really complex, considering that the SkipTopCFIAndReturn part
always moved the iterators forward to the first "real" instruction
in the found tail after ec32dff0b0.

The patch also restores the logic to "back past possible debugging
pseudos at beginning of block" to make sure ProfitableToMerge
gives consistent answers independent of DBG_VALUE instructions
before the tail. That is now done by ProfitableToMerge instead of
being hidden as a side-effect in ComputeCommonTailLength.

Reviewers: probinson, yechunliang, jmorse

Reviewed By: jmorse

Subscribers: Orlando, mehdi_amini, dexonsmith, aprantl, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70091
This commit is contained in:
Bjorn Pettersson 2019-11-11 18:20:40 +01:00
parent c4afc6566a
commit 898de30291
2 changed files with 183 additions and 95 deletions

View File

@ -307,109 +307,51 @@ static bool countsAsInstruction(const MachineInstr &MI) {
return !(MI.isDebugInstr() || MI.isCFIInstruction());
}
/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
/// of instructions they actually have in common together at their end. Return
/// iterators for the first shared instruction in each block.
/// Iterate backwards from the given iterator \p I, towards the beginning of the
/// block. If a MI satisfying 'countsAsInstruction' is found, return an iterator
/// pointing to that MI. If no such MI is found, return the end iterator.
static MachineBasicBlock::iterator
skipBackwardPastNonInstructions(MachineBasicBlock::iterator I,
MachineBasicBlock *MBB) {
while (I != MBB->begin()) {
--I;
if (countsAsInstruction(*I))
return I;
}
return MBB->end();
}
/// Given two machine basic blocks, return the number of instructions they
/// actually have in common together at their end. If a common tail is found (at
/// least by one instruction), then iterators for the first shared instruction
/// in each block are returned as well.
///
/// Non-instructions according to countsAsInstruction are ignored.
static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
MachineBasicBlock *MBB2,
MachineBasicBlock::iterator &I1,
MachineBasicBlock::iterator &I2) {
I1 = MBB1->end();
I2 = MBB2->end();
MachineBasicBlock::iterator MBBI1 = MBB1->end();
MachineBasicBlock::iterator MBBI2 = MBB2->end();
unsigned TailLen = 0;
while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
--I1; --I2;
// Skip debugging pseudos; necessary to avoid changing the code.
while (!countsAsInstruction(*I1)) {
if (I1==MBB1->begin()) {
while (!countsAsInstruction(*I2)) {
if (I2==MBB2->begin()) {
// I1==DBG at begin; I2==DBG at begin
goto SkipTopCFIAndReturn;
}
--I2;
}
++I2;
// I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
goto SkipTopCFIAndReturn;
}
--I1;
}
// I1==first (untested) non-DBG preceding known match
while (!countsAsInstruction(*I2)) {
if (I2==MBB2->begin()) {
++I1;
// I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
goto SkipTopCFIAndReturn;
}
--I2;
}
// I1, I2==first (untested) non-DBGs preceding known match
if (!I1->isIdenticalTo(*I2) ||
while (true) {
MBBI1 = skipBackwardPastNonInstructions(MBBI1, MBB1);
MBBI2 = skipBackwardPastNonInstructions(MBBI2, MBB2);
if (MBBI1 == MBB1->end() || MBBI2 == MBB2->end())
break;
if (!MBBI1->isIdenticalTo(*MBBI2) ||
// FIXME: This check is dubious. It's used to get around a problem where
// people incorrectly expect inline asm directives to remain in the same
// relative order. This is untenable because normal compiler
// optimizations (like this one) may reorder and/or merge these
// directives.
I1->isInlineAsm()) {
++I1; ++I2;
MBBI1->isInlineAsm()) {
break;
}
++TailLen;
}
// Back past possible debugging pseudos at beginning of block. This matters
// when one block differs from the other only by whether debugging pseudos
// are present at the beginning. (This way, the various checks later for
// I1==MBB1->begin() work as expected.)
if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
--I2;
while (I2->isDebugInstr()) {
if (I2 == MBB2->begin())
return TailLen;
--I2;
}
++I2;
}
if (I2 == MBB2->begin() && I1 != MBB1->begin()) {
--I1;
while (I1->isDebugInstr()) {
if (I1 == MBB1->begin())
return TailLen;
--I1;
}
++I1;
}
SkipTopCFIAndReturn:
// Ensure that I1 and I2 do not point to a CFI_INSTRUCTION. This can happen if
// I1 and I2 are non-identical when compared and then one or both of them ends
// up pointing to a CFI instruction after being incremented. For example:
/*
BB1:
...
INSTRUCTION_A
ADD32ri8 <- last common instruction
...
BB2:
...
INSTRUCTION_B
CFI_INSTRUCTION
ADD32ri8 <- last common instruction
...
*/
// When INSTRUCTION_A and INSTRUCTION_B are compared as not equal, after
// incrementing the iterators, I1 will point to ADD, however I2 will point to
// the CFI instruction. Later on, this leads to BB2 being 'hacked off' at the
// wrong place (in ReplaceTailWithBranchTo()) which results in losing this CFI
// instruction.
// Skip CFI_INSTRUCTION and debugging instruction; necessary to avoid changing the code.
while (I1 != MBB1->end() && !countsAsInstruction(*I1)) {
++I1;
}
while (I2 != MBB2->end() && !countsAsInstruction(*I2)) {
++I2;
I1 = MBBI1;
I2 = MBBI2;
}
return TailLen;
@ -661,6 +603,17 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
<< " and " << printMBBReference(*MBB2) << " is "
<< CommonTailLen << '\n');
// Move the iterators to the beginning of the MBB if we only got debug
// instructions before the tail. This is to avoid splitting a block when we
// only got debug instructions before the tail (to be invariant on -g).
if (skipDebugInstructionsForward(MBB1->begin(), MBB1->end()) == I1)
I1 = MBB1->begin();
if (skipDebugInstructionsForward(MBB2->begin(), MBB2->end()) == I2)
I2 = MBB2->begin();
bool FullBlockTail1 = I1 == MBB1->begin();
bool FullBlockTail2 = I2 == MBB2->begin();
// It's almost always profitable to merge any number of non-terminator
// instructions with the block that falls through into the common successor.
// This is true only for a single successor. For multiple successors, we are
@ -679,7 +632,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// are unlikely to become a fallthrough target after machine block placement.
// Tail merging these blocks is unlikely to create additional unconditional
// branches, and will reduce the size of this cold code.
if (I1 == MBB1->begin() && I2 == MBB2->begin() &&
if (FullBlockTail1 && FullBlockTail2 &&
blockEndsInUnreachable(MBB1) && blockEndsInUnreachable(MBB2))
return true;
@ -687,16 +640,16 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// a position where the other could fall through into it, merge any number
// of instructions, because it can be done without a branch.
// TODO: If the blocks are not adjacent, move one of them so that they are?
if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
if (MBB1->isLayoutSuccessor(MBB2) && FullBlockTail2)
return true;
if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
if (MBB2->isLayoutSuccessor(MBB1) && FullBlockTail1)
return true;
// If both blocks are identical and end in a branch, merge them unless they
// both have a fallthrough predecessor and successor.
// We can only do this after block placement because it depends on whether
// there are fallthroughs, and we don't know until after layout.
if (AfterPlacement && I1 == MBB1->begin() && I2 == MBB2->begin()) {
if (AfterPlacement && FullBlockTail1 && FullBlockTail2) {
auto BothFallThrough = [](MachineBasicBlock *MBB) {
if (MBB->succ_size() != 0 && !MBB->canFallThrough())
return false;
@ -730,7 +683,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// instructions that would be deleted in the merge.
MachineFunction *MF = MBB1->getParent();
return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() &&
(I1 == MBB1->begin() || I2 == MBB2->begin());
(FullBlockTail1 || FullBlockTail2);
}
unsigned BranchFolder::ComputeSameTails(unsigned CurHash,

View File

@ -0,0 +1,135 @@
# RUN: llc -mtriple=x86_64-- -run-pass branch-folder -O3 -o - %s | FileCheck %s
---
name: test1a
body: |
; CHECK-LABEL: name: test1a
; CHECK: bb.0:
; CHECK: TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
; CHECK: JCC_1 %bb.2, 5, implicit $eflags
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: MOV8mi $r12, 1, $noreg, 0, $noreg, 0
; CHECK-NOT: RET
; CHECK: bb.2:
; CHECK: MOV8mi $r13, 1, $noreg, 0, $noreg, 0
; CHECK: RET 0
bb.0:
TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
JCC_1 %bb.2, 5, implicit killed $eflags
bb.1:
MOV8mi $r12, 1, $noreg, 0, $noreg, 0
MOV8mi $r13, 1, $noreg, 0, $noreg, 0
RET 0
bb.2:
MOV8mi $r13, 1, $noreg, 0, $noreg, 0
RET 0
...
---
name: test1b
body: |
; Verify that we get the same rewrites as in test1a when adding some
; DBG_VALUE instructions in the mix.
;
; CHECK-LABEL: name: test1b
; CHECK: bb.0:
; CHECK: TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
; CHECK: JCC_1 %bb.2, 5, implicit $eflags
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: MOV8mi $r12, 1, $noreg, 0, $noreg, 0
; CHECK-NOT: RET
; CHECK: bb.2:
; CHECK: DBG_VALUE
; CHECK: DBG_VALUE
; CHECK: MOV8mi $r13, 1, $noreg, 0, $noreg, 0
; CHECK: RET 0
bb.0:
TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
JCC_1 %bb.2, 5, implicit killed $eflags
bb.1:
MOV8mi $r12, 1, $noreg, 0, $noreg, 0
MOV8mi $r13, 1, $noreg, 0, $noreg, 0
RET 0
bb.2:
DBG_VALUE
DBG_VALUE
MOV8mi $r13, 1, $noreg, 0, $noreg, 0
RET 0
...
---
name: test2a
body: |
; CFI instruction currently prevents the rewrite here (although technically
; I suppose that branch folding could let bb.1 fallthrough into bb.2 here).
;
; CHECK-LABEL: name: test2a
; CHECK: bb.0:
; CHECK: TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
; CHECK: JCC_1 %bb.2, 5, implicit killed $eflags
; CHECK: bb.1:
; CHECK: MOV8mi $r12, 1, $noreg, 0, $noreg, 0
; CHECK: MOV8mi $r13, 1, $noreg, 0, $noreg, 0
; CHECK: RET 0
; CHECK: bb.2:
; CHECK: CFI_INSTRUCTION def_cfa_offset 8
; CHECK: MOV8mi $r13, 1, $noreg, 0, $noreg, 0
; CHECK: RET 0
bb.0:
TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
JCC_1 %bb.2, 5, implicit killed $eflags
bb.1:
MOV8mi $r12, 1, $noreg, 0, $noreg, 0
MOV8mi $r13, 1, $noreg, 0, $noreg, 0
RET 0
bb.2:
CFI_INSTRUCTION def_cfa_offset 8
MOV8mi $r13, 1, $noreg, 0, $noreg, 0
RET 0
...
---
name: test2b
body: |
; Verify that we get the same rewrites as in test1a when adding some
; DBG_VALUE instructions in the mix.
;
; CHECK-LABEL: name: test2b
; CHECK: bb.0:
; CHECK: TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
; CHECK: JCC_1 %bb.2, 5, implicit killed $eflags
; CHECK: bb.1:
; CHECK: MOV8mi $r12, 1, $noreg, 0, $noreg, 0
; CHECK: MOV8mi $r13, 1, $noreg, 0, $noreg, 0
; CHECK: RET 0
; CHECK: bb.2:
; CHECK: DBG_VALUE
; CHECK: CFI_INSTRUCTION def_cfa_offset 8
; CHECK: DBG_VALUE
; CHECK: MOV8mi $r13, 1, $noreg, 0, $noreg, 0
; CHECK: RET 0
bb.0:
TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
JCC_1 %bb.2, 5, implicit killed $eflags
bb.1:
MOV8mi $r12, 1, $noreg, 0, $noreg, 0
MOV8mi $r13, 1, $noreg, 0, $noreg, 0
RET 0
bb.2:
DBG_VALUE
CFI_INSTRUCTION def_cfa_offset 8
DBG_VALUE
MOV8mi $r13, 1, $noreg, 0, $noreg, 0
RET 0
...