forked from OSchip/llvm-project
RegisterCoalescer: Remove phi-only subranges when erasing identity copies
Undef subranges are not present in the live range values, except when they cross block boundaries. In this situation, a identity copy is inside a loop, and one of the lanes is undefined. It only appears alive inside the loop due to the copy. Once the copy was erased, it would leave behind a segment inside the loop body with no corresponding def anywhere in the program. When RenameIndependentSubregs processed this dummy interval, it would introduce a "Multiple connected components in live interval" verifier error when IMPLICIT_DEFs were added to the other two blocks. I believe there is a missing verifier check for this type of dummy interval. I have found additional cases from the same fundamental problem in other areas I haven't managed to fix yet (e.g. the commented out prune_subrange_phi_value_* cases).
This commit is contained in:
parent
f2966d17a2
commit
60eba8161b
|
@ -211,6 +211,18 @@ namespace {
|
|||
/// live interval update is costly.
|
||||
void lateLiveIntervalUpdate();
|
||||
|
||||
/// Check if the incoming value defined by a COPY at \p SLRQ in the subrange
|
||||
/// has no value defined in the predecessors. If the incoming value is the
|
||||
/// same as defined by the copy itself, the value is considered undefined.
|
||||
bool copyValueUndefInPredecessors(LiveRange &S,
|
||||
const MachineBasicBlock *MBB,
|
||||
LiveQueryResult SLRQ);
|
||||
|
||||
/// Set necessary undef flags on subregister uses after pruning out undef
|
||||
/// lane segments from the subrange.
|
||||
void setUndefOnPrunedSubRegUses(LiveInterval &LI, Register Reg,
|
||||
LaneBitmask PrunedLanes);
|
||||
|
||||
/// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
|
||||
/// src/dst of the copy instruction CopyMI. This returns true if the copy
|
||||
/// was successfully coalesced away. If it is not currently possible to
|
||||
|
@ -1809,6 +1821,49 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool RegisterCoalescer::copyValueUndefInPredecessors(
|
||||
LiveRange &S, const MachineBasicBlock *MBB, LiveQueryResult SLRQ) {
|
||||
for (const MachineBasicBlock *Pred : MBB->predecessors()) {
|
||||
SlotIndex PredEnd = LIS->getMBBEndIdx(Pred);
|
||||
if (VNInfo *V = S.getVNInfoAt(PredEnd.getPrevSlot())) {
|
||||
// If this is a self loop, we may be reading the same value.
|
||||
if (V->id != SLRQ.valueOutOrDead()->id)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
|
||||
Register Reg,
|
||||
LaneBitmask PrunedLanes) {
|
||||
// If we had other instructions in the segment reading the undef sublane
|
||||
// value, we need to mark them with undef.
|
||||
for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
|
||||
unsigned SubRegIdx = MO.getSubReg();
|
||||
if (SubRegIdx == 0 || MO.isUndef())
|
||||
continue;
|
||||
|
||||
LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
|
||||
SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
|
||||
for (LiveInterval::SubRange &S : LI.subranges()) {
|
||||
if (!S.liveAt(Pos) && (PrunedLanes & SubRegMask).any()) {
|
||||
MO.setIsUndef();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LI.removeEmptySubRanges();
|
||||
|
||||
// A def of a subregister may be a use of other register lanes. Replacing
|
||||
// such a def with a def of a different register will eliminate the use,
|
||||
// and may cause the recorded live range to be larger than the actual
|
||||
// liveness in the program IR.
|
||||
LIS->shrinkToUses(&LI);
|
||||
}
|
||||
|
||||
bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
|
||||
Again = false;
|
||||
LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
|
||||
|
@ -1868,16 +1923,35 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
|
|||
VNInfo *ReadVNI = LRQ.valueIn();
|
||||
assert(ReadVNI && "No value before copy and no <undef> flag.");
|
||||
assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
|
||||
LI.MergeValueNumberInto(DefVNI, ReadVNI);
|
||||
|
||||
// Track incoming undef lanes we need to eliminate from the subrange.
|
||||
LaneBitmask PrunedLanes;
|
||||
MachineBasicBlock *MBB = CopyMI->getParent();
|
||||
|
||||
// Process subregister liveranges.
|
||||
for (LiveInterval::SubRange &S : LI.subranges()) {
|
||||
LiveQueryResult SLRQ = S.Query(CopyIdx);
|
||||
if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
|
||||
VNInfo *SReadVNI = SLRQ.valueIn();
|
||||
S.MergeValueNumberInto(SDefVNI, SReadVNI);
|
||||
if (VNInfo *SReadVNI = SLRQ.valueIn())
|
||||
SDefVNI = S.MergeValueNumberInto(SDefVNI, SReadVNI);
|
||||
|
||||
// If this copy introduced an undef subrange from an incoming value,
|
||||
// we need to eliminate the undef live in values from the subrange.
|
||||
if (copyValueUndefInPredecessors(S, MBB, SLRQ)) {
|
||||
LLVM_DEBUG(dbgs() << "Incoming sublane value is undef at copy\n");
|
||||
PrunedLanes |= S.LaneMask;
|
||||
S.removeValNo(SDefVNI);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LI.MergeValueNumberInto(DefVNI, ReadVNI);
|
||||
if (PrunedLanes.any()) {
|
||||
LLVM_DEBUG(dbgs() << "Pruning undef incoming lanes: "
|
||||
<< PrunedLanes << '\n');
|
||||
setUndefOnPrunedSubRegUses(LI, CP.getSrcReg(), PrunedLanes);
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
|
||||
}
|
||||
deleteInstr(CopyMI);
|
||||
|
|
|
@ -0,0 +1,338 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-coalescing -verify-machineinstrs -start-before=simple-register-coalescing -stop-after=machine-scheduler -o - %s | FileCheck %s
|
||||
|
||||
# Tests that break due to the handling of partially undef registers
|
||||
# when whole register identity copies are erased.
|
||||
|
||||
# Make sure there is no verifier error after
|
||||
# RenameIndepependentSubregs processes this. The coalescer would
|
||||
# remove the identity copy in %bb.1, and leave behind a dummy interval
|
||||
# across bb.1 with no corresponding value anywhere in the function.
|
||||
|
||||
---
|
||||
name: identity_copy_undef_subrange
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: identity_copy_undef_subrange
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: undef %0.sub1:vreg_64 = COPY $vgpr0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
; CHECK: S_BRANCH %bb.2
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 %0.sub1, implicit $mode, implicit $exec
|
||||
; CHECK: S_BRANCH %bb.1
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
undef %0.sub1:vreg_64 = COPY killed $vgpr0
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2, %bb.1
|
||||
|
||||
%0:vreg_64 = COPY %0
|
||||
S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 killed %0.sub1, implicit $mode, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
...
|
||||
|
||||
# Test another use of the register inside the block.
|
||||
---
|
||||
name: identity_copy_undef_subrange_other_uses0
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: identity_copy_undef_subrange_other_uses0
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: undef %0.sub1:vreg_64 = COPY $vgpr0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: S_NOP 0, implicit undef %0.sub0
|
||||
; CHECK: S_NOP 0, implicit undef %0.sub0
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
; CHECK: S_BRANCH %bb.2
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 %0.sub1, implicit $mode, implicit $exec
|
||||
; CHECK: S_BRANCH %bb.1
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
undef %0.sub1:vreg_64 = COPY killed $vgpr0
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2, %bb.1
|
||||
|
||||
%0:vreg_64 = COPY %0
|
||||
S_NOP 0, implicit %0.sub0
|
||||
S_NOP 0, implicit %0.sub0
|
||||
S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 killed %0.sub1, implicit $mode, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: second_identity_copy
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: second_identity_copy
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: undef %0.sub1:vreg_64 = COPY $vgpr0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
; CHECK: S_BRANCH %bb.2
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: undef %0.sub1:vreg_64 = nofpexcept V_MUL_F32_e32 0, %0.sub1, implicit $mode, implicit $exec
|
||||
; CHECK: S_BRANCH %bb.1
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
undef %0.sub1:vreg_64 = COPY killed $vgpr0
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2, %bb.1
|
||||
|
||||
%0:vreg_64 = COPY killed %0
|
||||
%0:vreg_64 = COPY %0
|
||||
S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
undef %0.sub1:vreg_64 = nofpexcept V_MUL_F32_e32 0, killed %0.sub1, implicit $mode, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: second_identity_copy_undef_lane_outblock
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: second_identity_copy_undef_lane_outblock
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: undef %0.sub1:vreg_64 = COPY $vgpr0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
; CHECK: S_BRANCH %bb.2
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: %0.sub1:vreg_64 = nofpexcept V_MUL_F32_e32 0, %0.sub1, implicit $mode, implicit $exec
|
||||
; CHECK: S_BRANCH %bb.1
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
undef %0.sub1:vreg_64 = COPY killed $vgpr0
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2, %bb.1
|
||||
|
||||
%0:vreg_64 = COPY killed %0
|
||||
%0:vreg_64 = COPY %0
|
||||
S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
%0.sub1:vreg_64 = nofpexcept V_MUL_F32_e32 0, killed %0.sub1, implicit $mode, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
...
|
||||
|
||||
# The same value number appears in multiple blocks
|
||||
---
|
||||
name: identity_copy_undef_subrange_null_vninfo_to_remove
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: identity_copy_undef_subrange_null_vninfo_to_remove
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: undef %0.sub1:vreg_64 = COPY $vgpr0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.3, implicit $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: S_NOP 0, implicit undef %0.sub0
|
||||
; CHECK: undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 %0.sub1, implicit $mode, implicit $exec
|
||||
; CHECK: S_BRANCH %bb.1
|
||||
; CHECK: bb.3:
|
||||
; CHECK: S_NOP 0, implicit undef %0.sub0
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
undef %0.sub1:vreg_64 = COPY killed $vgpr0
|
||||
|
||||
bb.1:
|
||||
%0:vreg_64 = COPY %0
|
||||
S_CBRANCH_EXECNZ %bb.3, implicit $exec
|
||||
|
||||
bb.2:
|
||||
S_NOP 0, implicit %0.sub0
|
||||
undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 killed %0.sub1, implicit $mode, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.3:
|
||||
S_NOP 0, implicit %0.sub0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: undef_copy_self_loop0
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: undef_copy_self_loop0
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: undef %0.sub1:vreg_64 = COPY $vgpr0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: S_NOP 0, implicit undef %0.sub0
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
undef %0.sub1:vreg_64 = COPY killed $vgpr0
|
||||
|
||||
bb.1:
|
||||
%0:vreg_64 = COPY %0
|
||||
S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
|
||||
bb.2:
|
||||
S_NOP 0, implicit %0.sub0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: undef_copy_self_loop1
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: undef_copy_self_loop1
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: undef %0.sub1:vreg_64 = COPY $vgpr0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: S_NOP 0, implicit %0.sub1
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
undef %0.sub1:vreg_64 = COPY killed $vgpr0
|
||||
|
||||
bb.1:
|
||||
%0:vreg_64 = COPY %0
|
||||
S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
|
||||
bb.2:
|
||||
S_NOP 0, implicit %0.sub1
|
||||
|
||||
...
|
||||
|
||||
# FIXME: This testcase is broken
|
||||
# The coalescing of the %0 = %2 COPY in %bb.2 needs to prune the dead
|
||||
# phi range across %bb.1 after it is erased.
|
||||
# ---
|
||||
# name: prune_subrange_phi_value_0
|
||||
# tracksRegLiveness: true
|
||||
# body: |
|
||||
# bb.0:
|
||||
# liveins: $vgpr0
|
||||
|
||||
# undef %0.sub1:vreg_64 = COPY killed $vgpr0
|
||||
|
||||
# bb.1:
|
||||
# successors: %bb.2, %bb.1
|
||||
|
||||
# %1:vreg_64 = COPY killed %0
|
||||
# %0:vreg_64 = COPY %1
|
||||
# S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
# S_BRANCH %bb.2
|
||||
|
||||
# bb.2:
|
||||
# undef %2.sub1:vreg_64 = COPY %0.sub1
|
||||
# %0:vreg_64 = COPY killed %2
|
||||
# S_BRANCH %bb.1
|
||||
|
||||
# ...
|
||||
|
||||
# Variant of testcase that asserts since there wasn't already an
|
||||
# incoming segment at the erased copy, and no valid end point.
|
||||
# ---
|
||||
# name: prune_subrange_phi_value_1
|
||||
# tracksRegLiveness: true
|
||||
# body: |
|
||||
# bb.0:
|
||||
# liveins: $vgpr0
|
||||
|
||||
# undef %0.sub1:vreg_64 = COPY killed $vgpr0
|
||||
|
||||
# bb.1:
|
||||
# successors: %bb.2, %bb.1
|
||||
|
||||
# %0:vreg_64 = COPY killed %0
|
||||
# S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
# S_BRANCH %bb.2
|
||||
|
||||
# bb.2:
|
||||
# undef %1.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 killed %0.sub1, implicit $mode, implicit $exec
|
||||
# %0:vreg_64 = COPY killed %1
|
||||
# S_BRANCH %bb.1
|
||||
|
||||
# ...
|
||||
|
||||
---
|
||||
name: prune_subrange_phi_value_2
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: prune_subrange_phi_value_2
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK: undef %0.sub1:vreg_64 = COPY $vgpr0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
; CHECK: S_BRANCH %bb.2
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 %0.sub1, implicit $mode, implicit $exec
|
||||
; CHECK: S_BRANCH %bb.1
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
undef %0.sub1:vreg_64 = COPY killed $vgpr0
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2, %bb.1
|
||||
|
||||
%0:vreg_64 = COPY killed %0
|
||||
S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
%0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 %0.sub1, implicit $mode, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
...
|
Loading…
Reference in New Issue