forked from OSchip/llvm-project
GlobalISel: Fix legalization error where CSE leaves behind dead defs
If the conversion artifact introduced in the unmerge of cast of merge combine already existed in the function, this would introduce dead copies which kept the old casts around, neither of which were deleted, and would fail legalization. This would fail as follows: The G_UNMERGE_VALUES of the G_SEXT of the G_BUILD_VECTOR would introduce a G_SEXT for each of the scalars. Some of the required G_SEXTs already existed in the function, so CSE moves them up in the function and introduces a copy to the original result register. The introduced CSE copies are dead, since the originally G_SEXTs were already directly used. These copies add a use to the illegal G_SEXTs, so they are not deleted. The artifact combiner does not see the defs that need to be updated, since it was hidden inside the CSE builder. I see 2 potential fixes, and opted for the mechanically simpler one, which is to just not insert the cast if the result operand isn't used. Alternatively, we could not insert the cast directly into the result register, and use replaceRegOrBuildCopy similar to the case where there is no conversion. I suspect this is a wider problem in the artifact combiner.
This commit is contained in:
parent
58cb303777
commit
42098c4a30
|
@ -977,10 +977,13 @@ public:
|
|||
Builder.setInstr(MI);
|
||||
|
||||
for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
|
||||
Register MergeSrc = MergeI->getOperand(Idx + 1).getReg();
|
||||
Register DefReg = MI.getOperand(Idx).getReg();
|
||||
Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc});
|
||||
UpdatedDefs.push_back(DefReg);
|
||||
Register MergeSrc = MergeI->getOperand(Idx + 1).getReg();
|
||||
|
||||
if (!MRI.use_empty(DefReg)) {
|
||||
Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc});
|
||||
UpdatedDefs.push_back(DefReg);
|
||||
}
|
||||
}
|
||||
|
||||
markInstAndDefDead(MI, *MergeI, DeadInsts);
|
||||
|
|
|
@ -0,0 +1,114 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck %s
|
||||
|
||||
# The G_UNMERGE_VALUES of the G_SEXT of G_BUILD_VECTOR will introduce
|
||||
# a new G_SEXT for each of the scalars. The sext of %and[4-7] already
|
||||
# exist, so the CSE MIR builder in the artifact combiner would re-use
|
||||
# those instructions and introduce dead copies which were never
|
||||
# deleted, and also kept the illegal %sext[4-7] alive which would fail
|
||||
# legalization.
|
||||
|
||||
---
|
||||
name: artifact_combiner_sext_already_exists
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: artifact_combiner_sext_already_exists
|
||||
; CHECK: %undef:_(p4) = G_IMPLICIT_DEF
|
||||
; CHECK-NEXT: %load:_(s32) = G_LOAD %undef(p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4)
|
||||
; CHECK-NEXT: %unmerge3_0:_(s1) = G_TRUNC %load(s32)
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR %load, [[C]](s32)
|
||||
; CHECK-NEXT: %unmerge3_1:_(s1) = G_TRUNC [[LSHR]](s32)
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR %load, [[C1]](s32)
|
||||
; CHECK-NEXT: %unmerge3_2:_(s1) = G_TRUNC [[LSHR1]](s32)
|
||||
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
|
||||
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR %load, [[C2]](s32)
|
||||
; CHECK-NEXT: %unmerge3_3:_(s1) = G_TRUNC [[LSHR2]](s32)
|
||||
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
|
||||
; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR %load, [[C3]](s32)
|
||||
; CHECK-NEXT: %unmerge3_4:_(s1) = G_TRUNC [[LSHR3]](s32)
|
||||
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
|
||||
; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR %load, [[C4]](s32)
|
||||
; CHECK-NEXT: %unmerge3_5:_(s1) = G_TRUNC [[LSHR4]](s32)
|
||||
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
|
||||
; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR %load, [[C5]](s32)
|
||||
; CHECK-NEXT: %unmerge3_6:_(s1) = G_TRUNC [[LSHR5]](s32)
|
||||
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
|
||||
; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR %load, [[C6]](s32)
|
||||
; CHECK-NEXT: %unmerge3_7:_(s1) = G_TRUNC [[LSHR6]](s32)
|
||||
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
|
||||
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
|
||||
; CHECK-NEXT: %negone:_(s1) = G_CONSTANT i1 true
|
||||
; CHECK-NEXT: %and0:_(s1) = G_XOR %unmerge3_0, %negone
|
||||
; CHECK-NEXT: %and1:_(s1) = G_XOR %unmerge3_1, %negone
|
||||
; CHECK-NEXT: %and2:_(s1) = G_XOR %unmerge3_2, %negone
|
||||
; CHECK-NEXT: %and3:_(s1) = G_XOR %unmerge3_3, %negone
|
||||
; CHECK-NEXT: %and4:_(s1) = G_XOR %unmerge3_4, %negone
|
||||
; CHECK-NEXT: %and5:_(s1) = G_XOR %unmerge3_5, %negone
|
||||
; CHECK-NEXT: %and6:_(s1) = G_XOR %unmerge3_6, %negone
|
||||
; CHECK-NEXT: %and7:_(s1) = G_XOR %unmerge3_7, %negone
|
||||
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
|
||||
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT %and0(s1)
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C10]]
|
||||
; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT %and1(s1)
|
||||
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT1]], [[C10]]
|
||||
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
|
||||
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
|
||||
; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT %and2(s1)
|
||||
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT2]], [[C10]]
|
||||
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C8]](s32)
|
||||
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
|
||||
; CHECK-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT %and3(s1)
|
||||
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT3]], [[C10]]
|
||||
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32)
|
||||
; CHECK-NEXT: %merge0:_(s32) = G_OR [[OR1]], [[SHL2]]
|
||||
; CHECK-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT %and4(s1)
|
||||
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT4]], [[C10]]
|
||||
; CHECK-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT %and5(s1)
|
||||
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT5]], [[C10]]
|
||||
; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
|
||||
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
|
||||
; CHECK-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT %and6(s1)
|
||||
; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SEXT6]], [[C10]]
|
||||
; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C8]](s32)
|
||||
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL4]]
|
||||
; CHECK-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT %and7(s1)
|
||||
; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SEXT7]], [[C10]]
|
||||
; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32)
|
||||
; CHECK-NEXT: %merge1:_(s32) = G_OR [[OR3]], [[SHL5]]
|
||||
; CHECK-NEXT: %bv:_(<2 x s32>) = G_BUILD_VECTOR %merge0(s32), %merge1(s32)
|
||||
; CHECK-NEXT: %null:_(p1) = G_CONSTANT i64 0
|
||||
; CHECK-NEXT: G_STORE %bv(<2 x s32>), %null(p1) :: (store (<2 x s32>), addrspace 1)
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
%undef:_(p4) = G_IMPLICIT_DEF
|
||||
%load:_(s32) = G_LOAD %undef :: (dereferenceable invariant load (s8), align 16, addrspace 4)
|
||||
%trunc:_(s8) = G_TRUNC %load
|
||||
%unmerge3_0:_(s1), %unmerge3_1:_(s1), %unmerge3_2:_(s1), %unmerge3_3:_(s1), %unmerge3_4:_(s1), %unmerge3_5:_(s1), %unmerge3_6:_(s1), %unmerge3_7:_(s1) = G_UNMERGE_VALUES %trunc
|
||||
%negone:_(s1) = G_CONSTANT i1 true
|
||||
%and0:_(s1) = G_XOR %unmerge3_0, %negone
|
||||
%and1:_(s1) = G_XOR %unmerge3_1, %negone
|
||||
%and2:_(s1) = G_XOR %unmerge3_2, %negone
|
||||
%and3:_(s1) = G_XOR %unmerge3_3, %negone
|
||||
%and4:_(s1) = G_XOR %unmerge3_4, %negone
|
||||
%and5:_(s1) = G_XOR %unmerge3_5, %negone
|
||||
%and6:_(s1) = G_XOR %unmerge3_6, %negone
|
||||
%and7:_(s1) = G_XOR %unmerge3_7, %negone
|
||||
%boolvec:_(<8 x s1>) = G_BUILD_VECTOR %and0(s1), %and1(s1), %and2(s1), %and3(s1), %and4(s1), %and5(s1), %and6(s1), %and7(s1)
|
||||
%sext:_(<8 x s8>) = G_SEXT %boolvec(<8 x s1>)
|
||||
%sext_lo:_(<4 x s8>), %sext_hi:_(<4 x s8>) = G_UNMERGE_VALUES %sext(<8 x s8>)
|
||||
%sext0:_(s8), %sext1:_(s8), %sext2:_(s8), %sext3:_(s8) = G_UNMERGE_VALUES %sext_lo(<4 x s8>)
|
||||
%merge0:_(s32) = G_MERGE_VALUES %sext0(s8), %sext1(s8), %sext2(s8), %sext3(s8)
|
||||
%sext4:_(s8) = G_SEXT %and4(s1)
|
||||
%sext5:_(s8) = G_SEXT %and5(s1)
|
||||
%sext6:_(s8) = G_SEXT %and6(s1)
|
||||
%sext7:_(s8) = G_SEXT %and7(s1)
|
||||
%merge1:_(s32) = G_MERGE_VALUES %sext4, %sext5, %sext6, %sext7
|
||||
%bv:_(<2 x s32>) = G_BUILD_VECTOR %merge0(s32), %merge1(s32)
|
||||
%null:_(p1) = G_CONSTANT i64 0
|
||||
G_STORE %bv(<2 x s32>), %null :: (store (<2 x s32>), addrspace 1)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
Loading…
Reference in New Issue