forked from OSchip/llvm-project
AMDGPU: Legalize the operand of SI_INIT_M0
Summary: This fixes a case where the argument to a sendmsg intrinsic ends up in a VGPR, for whatever reason. The underlying performance issue is that a multiplication that can be an s_mul_i32 is instead needlessly generated as v_mul_u32_u24, but this is not addressed by this patch. Change-Id: I61fd4034314d5acdf6074632c30b65364dfa7328 Reviewers: arsenm, rampitec Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D45826 llvm-svn: 330393
This commit is contained in:
parent
793af3b9f0
commit
7a87977fb2
|
@ -3294,6 +3294,13 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
|
||||||
unsigned DstReg = MRI.createVirtualRegister(SRC);
|
unsigned DstReg = MRI.createVirtualRegister(SRC);
|
||||||
unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
|
unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
|
||||||
|
|
||||||
|
if (SubRegs == 1) {
|
||||||
|
BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
|
||||||
|
get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
|
||||||
|
.addReg(SrcReg);
|
||||||
|
return DstReg;
|
||||||
|
}
|
||||||
|
|
||||||
SmallVector<unsigned, 8> SRegs;
|
SmallVector<unsigned, 8> SRegs;
|
||||||
for (unsigned i = 0; i < SubRegs; ++i) {
|
for (unsigned i = 0; i < SubRegs; ++i) {
|
||||||
unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
|
unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
|
||||||
|
@ -3471,6 +3478,14 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Legalize SI_INIT_M0
|
||||||
|
if (MI.getOpcode() == AMDGPU::SI_INIT_M0) {
|
||||||
|
MachineOperand &Src = MI.getOperand(0);
|
||||||
|
if (Src.isReg() && RI.hasVGPRs(MRI.getRegClass(Src.getReg())))
|
||||||
|
Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Legalize MIMG and MUBUF/MTBUF for shaders.
|
// Legalize MIMG and MUBUF/MTBUF for shaders.
|
||||||
//
|
//
|
||||||
// Shaders only generate MUBUF/MTBUF instructions via intrinsics or via
|
// Shaders only generate MUBUF/MTBUF instructions via intrinsics or via
|
||||||
|
|
|
@ -121,6 +121,21 @@ body:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; TODO: This should use s_mul_i32 instead of v_mul_u32_u24 + v_readfirstlane!
|
||||||
|
;
|
||||||
|
; GCN-LABEL: {{^}}test_mul24:
|
||||||
|
; GCN: v_mul_u32_u24_e32
|
||||||
|
; GCN: v_readfirstlane_b32
|
||||||
|
; GCN: s_mov_b32 m0,
|
||||||
|
; GCN: s_sendmsg sendmsg(MSG_INTERRUPT)
|
||||||
|
define amdgpu_gs void @test_mul24(i32 inreg %arg) {
|
||||||
|
body:
|
||||||
|
%tmp1 = and i32 %arg, 511
|
||||||
|
%tmp2 = mul nuw nsw i32 %tmp1, 12288
|
||||||
|
call void @llvm.amdgcn.s.sendmsg(i32 1, i32 %tmp2)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0
|
declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0
|
||||||
declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0
|
declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue