forked from OSchip/llvm-project
[AMDGPU] Fix SGPR fixing through SCC chaining
Summary: - During the fixing of SGPR copying from VGPR, ensure users of SCC is properly propagated, i.e. * only propagate through live def of SCC, * skip the SCC-def inst itself, and * stop the propagation on the other SCC-def inst after checking its SCC-use first. Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59362 llvm-svn: 356258
This commit is contained in:
parent
728293ac87
commit
6883d7e192
|
@ -4320,8 +4320,10 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
|
|||
for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) {
|
||||
MachineOperand &Op = Inst.getOperand(i);
|
||||
if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
|
||||
// Only propagate through live-def of SCC.
|
||||
if (Op.isDef() && !Op.isDead())
|
||||
addSCCDefUsersToVALUWorklist(Op, Inst, Worklist);
|
||||
Inst.RemoveOperand(i);
|
||||
addSCCDefUsersToVALUWorklist(Inst, Worklist);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5014,19 +5016,23 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
|
|||
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
|
||||
}
|
||||
|
||||
void SIInstrInfo::addSCCDefUsersToVALUWorklist(
|
||||
MachineInstr &SCCDefInst, SetVectorType &Worklist) const {
|
||||
void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op,
|
||||
MachineInstr &SCCDefInst,
|
||||
SetVectorType &Worklist) const {
|
||||
// Ensure that def inst defines SCC, which is still live.
|
||||
assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isDef() &&
|
||||
!Op.isDead() && Op.getParent() == &SCCDefInst);
|
||||
// This assumes that all the users of SCC are in the same block
|
||||
// as the SCC def.
|
||||
for (MachineInstr &MI :
|
||||
make_range(MachineBasicBlock::iterator(SCCDefInst),
|
||||
SCCDefInst.getParent()->end())) {
|
||||
for (MachineInstr &MI : // Skip the def inst itself.
|
||||
make_range(std::next(MachineBasicBlock::iterator(SCCDefInst)),
|
||||
SCCDefInst.getParent()->end())) {
|
||||
// Check if SCC is used first.
|
||||
if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1)
|
||||
Worklist.insert(&MI);
|
||||
// Exit if we find another SCC def.
|
||||
if (MI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1)
|
||||
return;
|
||||
|
||||
if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1)
|
||||
Worklist.insert(&MI);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -120,9 +120,9 @@ private:
|
|||
void addUsersToMoveToVALUWorklist(unsigned Reg, MachineRegisterInfo &MRI,
|
||||
SetVectorType &Worklist) const;
|
||||
|
||||
void
|
||||
addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst,
|
||||
SetVectorType &Worklist) const;
|
||||
void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
|
||||
MachineInstr &SCCDefInst,
|
||||
SetVectorType &Worklist) const;
|
||||
|
||||
const TargetRegisterClass *
|
||||
getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
# RUN: llc -march=amdgcn -run-pass=si-fix-sgpr-copies -o - %s | FileCheck --check-prefix=GCN %s
|
||||
|
||||
# GCN-LABEL: name: fix-sgpr-copies
|
||||
# GCN: V_ADD_I32_e32
|
||||
# GCN: V_ADDC_U32_e32
|
||||
---
|
||||
name: fix-sgpr-copies
|
||||
body: |
|
||||
bb.0:
|
||||
%0:vgpr_32 = IMPLICIT_DEF
|
||||
%1:sreg_32 = IMPLICIT_DEF
|
||||
%2:sreg_32 = IMPLICIT_DEF
|
||||
%3:sreg_32 = IMPLICIT_DEF
|
||||
%4:vgpr_32 = V_CVT_U32_F32_e64 0, %0:vgpr_32, 0, 0, implicit $exec
|
||||
%5:sreg_32 = COPY %4:vgpr_32
|
||||
%6:sreg_32 = S_ADD_I32 %2:sreg_32, %5:sreg_32, implicit-def $scc
|
||||
%7:sreg_32 = S_ADDC_U32 %3:sreg_32, %1:sreg_32, implicit-def $scc, implicit $scc
|
||||
...
|
|
@ -168,3 +168,17 @@ define amdgpu_kernel void @test_urem2364(i64 addrspace(1)* %out, i64 %x, i64 %y)
|
|||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
;FUNC-LABEL: {{^}}test_udiv_k:
|
||||
;GCN: v_mul{{.+}} v{{[0-9]+}}, v{{[0-9]+}}, 24
|
||||
;GCN: v_mul{{.+}} v{{[0-9]+}}, v{{[0-9]+}}, 24
|
||||
;GCN: v_mul{{.+}} v{{[0-9]+}}, v{{[0-9]+}}, 24
|
||||
;GCN: v_add
|
||||
;GCN: v_addc
|
||||
;GCN: v_addc
|
||||
;GCN: s_endpgm
|
||||
define amdgpu_kernel void @test_udiv_k(i64 addrspace(1)* %out, i64 %x) {
|
||||
%result = udiv i64 24, %x
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue