From 6883d7e192fc81ba0ca1b9b4e55d54836b2712a4 Mon Sep 17 00:00:00 2001 From: Michael Liao <michael.hliao@gmail.com> Date: Fri, 15 Mar 2019 12:42:21 +0000 Subject: [PATCH] [AMDGPU] Fix SGPR fixing through SCC chaining Summary: - During the fixing of SGPR copying from VGPR, ensure users of SCC is properly propagated, i.e. * only propagate through live def of SCC, * skip the SCC-def inst itself, and * stop the propagation on the other SCC-def inst after checking its SCC-use first. Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59362 llvm-svn: 356258 --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 24 ++++++++++++-------- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 6 ++--- llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir | 18 +++++++++++++++ llvm/test/CodeGen/AMDGPU/udivrem64.ll | 14 ++++++++++++ 4 files changed, 50 insertions(+), 12 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index ec6779f46c66..0d90309c3d38 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4320,8 +4320,10 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst, for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) { MachineOperand &Op = Inst.getOperand(i); if (Op.isReg() && Op.getReg() == AMDGPU::SCC) { + // Only propagate through live-def of SCC. + if (Op.isDef() && !Op.isDead()) + addSCCDefUsersToVALUWorklist(Op, Inst, Worklist); Inst.RemoveOperand(i); - addSCCDefUsersToVALUWorklist(Inst, Worklist); } } @@ -5014,19 +5016,23 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); } -void SIInstrInfo::addSCCDefUsersToVALUWorklist( - MachineInstr &SCCDefInst, SetVectorType &Worklist) const { +void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op, + MachineInstr &SCCDefInst, + SetVectorType &Worklist) const { + // Ensure that def inst defines SCC, which is still live. + assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isDef() && + !Op.isDead() && Op.getParent() == &SCCDefInst); // This assumes that all the users of SCC are in the same block // as the SCC def. - for (MachineInstr &MI : - make_range(MachineBasicBlock::iterator(SCCDefInst), - SCCDefInst.getParent()->end())) { + for (MachineInstr &MI : // Skip the def inst itself. + make_range(std::next(MachineBasicBlock::iterator(SCCDefInst)), + SCCDefInst.getParent()->end())) { + // Check if SCC is used first. + if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1) + Worklist.insert(&MI); // Exit if we find another SCC def. if (MI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1) return; - - if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1) - Worklist.insert(&MI); } } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index b96c40def004..51b5df93fefb 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -120,9 +120,9 @@ private: void addUsersToMoveToVALUWorklist(unsigned Reg, MachineRegisterInfo &MRI, SetVectorType &Worklist) const; - void - addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst, - SetVectorType &Worklist) const; + void addSCCDefUsersToVALUWorklist(MachineOperand &Op, + MachineInstr &SCCDefInst, + SetVectorType &Worklist) const; const TargetRegisterClass * getDestEquivalentVGPRClass(const MachineInstr &Inst) const; diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir new file mode 100644 index 000000000000..3d6e05cb2c9b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir @@ -0,0 +1,18 @@ +# RUN: llc -march=amdgcn -run-pass=si-fix-sgpr-copies -o - %s | FileCheck --check-prefix=GCN %s + +# GCN-LABEL: name: fix-sgpr-copies +# GCN: V_ADD_I32_e32 +# GCN: V_ADDC_U32_e32 +--- +name: fix-sgpr-copies +body: | + bb.0: + %0:vgpr_32 = IMPLICIT_DEF + %1:sreg_32 = IMPLICIT_DEF + %2:sreg_32 = IMPLICIT_DEF + %3:sreg_32 = IMPLICIT_DEF + %4:vgpr_32 = V_CVT_U32_F32_e64 0, %0:vgpr_32, 0, 0, implicit $exec + %5:sreg_32 = COPY %4:vgpr_32 + %6:sreg_32 = S_ADD_I32 %2:sreg_32, %5:sreg_32, implicit-def $scc + %7:sreg_32 = S_ADDC_U32 %3:sreg_32, %1:sreg_32, implicit-def $scc, implicit $scc +... diff --git a/llvm/test/CodeGen/AMDGPU/udivrem64.ll b/llvm/test/CodeGen/AMDGPU/udivrem64.ll index 408fd01c2922..32a758b7c7d8 100644 --- a/llvm/test/CodeGen/AMDGPU/udivrem64.ll +++ b/llvm/test/CodeGen/AMDGPU/udivrem64.ll @@ -168,3 +168,17 @@ define amdgpu_kernel void @test_urem2364(i64 addrspace(1)* %out, i64 %x, i64 %y) store i64 %result, i64 addrspace(1)* %out ret void } + +;FUNC-LABEL: {{^}}test_udiv_k: +;GCN: v_mul{{.+}} v{{[0-9]+}}, v{{[0-9]+}}, 24 +;GCN: v_mul{{.+}} v{{[0-9]+}}, v{{[0-9]+}}, 24 +;GCN: v_mul{{.+}} v{{[0-9]+}}, v{{[0-9]+}}, 24 +;GCN: v_add +;GCN: v_addc +;GCN: v_addc +;GCN: s_endpgm +define amdgpu_kernel void @test_udiv_k(i64 addrspace(1)* %out, i64 %x) { + %result = udiv i64 24, %x + store i64 %result, i64 addrspace(1)* %out + ret void +}