GlobalISel: Fix legalization error where CSE leaves behind dead defs

If the conversion artifact introduced in the unmerge of cast of merge
combine already existed in the function, this would introduce dead
copies which kept the old casts around, neither of which were deleted,
and would fail legalization.

This would fail as follows:

The G_UNMERGE_VALUES of the G_SEXT of the G_BUILD_VECTOR would
introduce a G_SEXT for each of the scalars.

Some of the required G_SEXTs already existed in the function, so CSE
moves them up in the function and introduces a copy to the original
result register.

The introduced CSE copies are dead, since the originally G_SEXTs were
already directly used. These copies add a use to the illegal G_SEXTs,
so they are not deleted.

The artifact combiner does not see the defs that need to be updated,
since it was hidden inside the CSE builder.

I see 2 potential fixes, and opted for the mechanically simpler one,
which is to just not insert the cast if the result operand isn't
used. Alternatively, we could not insert the cast directly into the
result register, and use replaceRegOrBuildCopy similar to the case
where there is no conversion.

I suspect this is a wider problem in the artifact combiner.
This commit is contained in:
Matt Arsenault 2022-01-07 17:44:23 -05:00
parent 58cb303777
commit 42098c4a30
2 changed files with 120 additions and 3 deletions

View File

@ -977,10 +977,13 @@ public:
Builder.setInstr(MI); Builder.setInstr(MI);
for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
Register MergeSrc = MergeI->getOperand(Idx + 1).getReg();
Register DefReg = MI.getOperand(Idx).getReg(); Register DefReg = MI.getOperand(Idx).getReg();
Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc}); Register MergeSrc = MergeI->getOperand(Idx + 1).getReg();
UpdatedDefs.push_back(DefReg);
if (!MRI.use_empty(DefReg)) {
Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc});
UpdatedDefs.push_back(DefReg);
}
} }
markInstAndDefDead(MI, *MergeI, DeadInsts); markInstAndDefDead(MI, *MergeI, DeadInsts);

View File

@ -0,0 +1,114 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck %s
# The G_UNMERGE_VALUES of the G_SEXT of G_BUILD_VECTOR will introduce
# a new G_SEXT for each of the scalars. The sext of %and[4-7] already
# exist, so the CSE MIR builder in the artifact combiner would re-use
# those instructions and introduce dead copies which were never
# deleted, and also kept the illegal %sext[4-7] alive which would fail
# legalization.
---
name: artifact_combiner_sext_already_exists
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: artifact_combiner_sext_already_exists
; CHECK: %undef:_(p4) = G_IMPLICIT_DEF
; CHECK-NEXT: %load:_(s32) = G_LOAD %undef(p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4)
; CHECK-NEXT: %unmerge3_0:_(s1) = G_TRUNC %load(s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR %load, [[C]](s32)
; CHECK-NEXT: %unmerge3_1:_(s1) = G_TRUNC [[LSHR]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR %load, [[C1]](s32)
; CHECK-NEXT: %unmerge3_2:_(s1) = G_TRUNC [[LSHR1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR %load, [[C2]](s32)
; CHECK-NEXT: %unmerge3_3:_(s1) = G_TRUNC [[LSHR2]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR %load, [[C3]](s32)
; CHECK-NEXT: %unmerge3_4:_(s1) = G_TRUNC [[LSHR3]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR %load, [[C4]](s32)
; CHECK-NEXT: %unmerge3_5:_(s1) = G_TRUNC [[LSHR4]](s32)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR %load, [[C5]](s32)
; CHECK-NEXT: %unmerge3_6:_(s1) = G_TRUNC [[LSHR5]](s32)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR %load, [[C6]](s32)
; CHECK-NEXT: %unmerge3_7:_(s1) = G_TRUNC [[LSHR6]](s32)
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK-NEXT: %negone:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: %and0:_(s1) = G_XOR %unmerge3_0, %negone
; CHECK-NEXT: %and1:_(s1) = G_XOR %unmerge3_1, %negone
; CHECK-NEXT: %and2:_(s1) = G_XOR %unmerge3_2, %negone
; CHECK-NEXT: %and3:_(s1) = G_XOR %unmerge3_3, %negone
; CHECK-NEXT: %and4:_(s1) = G_XOR %unmerge3_4, %negone
; CHECK-NEXT: %and5:_(s1) = G_XOR %unmerge3_5, %negone
; CHECK-NEXT: %and6:_(s1) = G_XOR %unmerge3_6, %negone
; CHECK-NEXT: %and7:_(s1) = G_XOR %unmerge3_7, %negone
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT %and0(s1)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C10]]
; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT %and1(s1)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT1]], [[C10]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT %and2(s1)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT2]], [[C10]]
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C8]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT %and3(s1)
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT3]], [[C10]]
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32)
; CHECK-NEXT: %merge0:_(s32) = G_OR [[OR1]], [[SHL2]]
; CHECK-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT %and4(s1)
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT4]], [[C10]]
; CHECK-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT %and5(s1)
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT5]], [[C10]]
; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
; CHECK-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT %and6(s1)
; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SEXT6]], [[C10]]
; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C8]](s32)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL4]]
; CHECK-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT %and7(s1)
; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SEXT7]], [[C10]]
; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32)
; CHECK-NEXT: %merge1:_(s32) = G_OR [[OR3]], [[SHL5]]
; CHECK-NEXT: %bv:_(<2 x s32>) = G_BUILD_VECTOR %merge0(s32), %merge1(s32)
; CHECK-NEXT: %null:_(p1) = G_CONSTANT i64 0
; CHECK-NEXT: G_STORE %bv(<2 x s32>), %null(p1) :: (store (<2 x s32>), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%undef:_(p4) = G_IMPLICIT_DEF
%load:_(s32) = G_LOAD %undef :: (dereferenceable invariant load (s8), align 16, addrspace 4)
%trunc:_(s8) = G_TRUNC %load
%unmerge3_0:_(s1), %unmerge3_1:_(s1), %unmerge3_2:_(s1), %unmerge3_3:_(s1), %unmerge3_4:_(s1), %unmerge3_5:_(s1), %unmerge3_6:_(s1), %unmerge3_7:_(s1) = G_UNMERGE_VALUES %trunc
%negone:_(s1) = G_CONSTANT i1 true
%and0:_(s1) = G_XOR %unmerge3_0, %negone
%and1:_(s1) = G_XOR %unmerge3_1, %negone
%and2:_(s1) = G_XOR %unmerge3_2, %negone
%and3:_(s1) = G_XOR %unmerge3_3, %negone
%and4:_(s1) = G_XOR %unmerge3_4, %negone
%and5:_(s1) = G_XOR %unmerge3_5, %negone
%and6:_(s1) = G_XOR %unmerge3_6, %negone
%and7:_(s1) = G_XOR %unmerge3_7, %negone
%boolvec:_(<8 x s1>) = G_BUILD_VECTOR %and0(s1), %and1(s1), %and2(s1), %and3(s1), %and4(s1), %and5(s1), %and6(s1), %and7(s1)
%sext:_(<8 x s8>) = G_SEXT %boolvec(<8 x s1>)
%sext_lo:_(<4 x s8>), %sext_hi:_(<4 x s8>) = G_UNMERGE_VALUES %sext(<8 x s8>)
%sext0:_(s8), %sext1:_(s8), %sext2:_(s8), %sext3:_(s8) = G_UNMERGE_VALUES %sext_lo(<4 x s8>)
%merge0:_(s32) = G_MERGE_VALUES %sext0(s8), %sext1(s8), %sext2(s8), %sext3(s8)
%sext4:_(s8) = G_SEXT %and4(s1)
%sext5:_(s8) = G_SEXT %and5(s1)
%sext6:_(s8) = G_SEXT %and6(s1)
%sext7:_(s8) = G_SEXT %and7(s1)
%merge1:_(s32) = G_MERGE_VALUES %sext4, %sext5, %sext6, %sext7
%bv:_(<2 x s32>) = G_BUILD_VECTOR %merge0(s32), %merge1(s32)
%null:_(p1) = G_CONSTANT i64 0
G_STORE %bv(<2 x s32>), %null :: (store (<2 x s32>), addrspace 1)
S_ENDPGM 0
...