From aff2995f46ec2a38dffcdb3ad5a9cd02197ca7f9 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 1 Aug 2019 18:27:11 +0000 Subject: [PATCH] AMDGPU: Use tablegen pattern for sendmsg intrinsics Since this now emits a direct copy to m0, SIFixSGPRCopies has to handle a physical register. llvm-svn: 367593 --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 6 ++++-- llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 24 ++++++++++++++++++---- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 9 -------- llvm/lib/Target/AMDGPU/SOPInstructions.td | 7 +++---- 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index c834ed4cf686..071e1b6bec1e 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -199,9 +199,11 @@ def int_amdgcn_wavefrontsize : // The first parameter is s_sendmsg immediate (i16), // the second one is copied to m0 def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">, - Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>; + Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], + [ImmArg<0>, IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">, - Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>; + Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], + [ImmArg<0>, IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, Intrinsic<[], [], [IntrConvergent]>; diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 861fe747c664..05aee164cb55 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -619,13 +619,29 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { case AMDGPU::WQM: case AMDGPU::SOFT_WQM: case AMDGPU::WWM: { - // If the destination register is a physical register there isn't really - // much we can do to fix this. - if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) - continue; + Register DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *SrcRC, *DstRC; std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI); + + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) { + // If the destination register is a physical register there isn't + // really much we can do to fix this. + // Some special instructions use M0 as an input. Some even only use + // the first lane. Insert a readfirstlane and hope for the best. + if (DstReg == AMDGPU::M0 && TRI->hasVectorRegisters(SrcRC)) { + Register TmpReg + = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + + BuildMI(MBB, MI, MI.getDebugLoc(), + TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg) + .add(MI.getOperand(1)); + MI.getOperand(1).setReg(TmpReg); + } + + continue; + } + if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) { unsigned SrcReg = MI.getOperand(1).getReg(); if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index fa0dc7787e8c..bee25c9d1844 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6735,15 +6735,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE; return DAG.getNode(Opc, DL, Op->getVTList(), Ops); } - case Intrinsic::amdgcn_s_sendmsg: - case Intrinsic::amdgcn_s_sendmsghalt: { - unsigned NodeOp = (IntrinsicID == Intrinsic::amdgcn_s_sendmsg) ? - AMDGPUISD::SENDMSG : AMDGPUISD::SENDMSGHALT; - Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3)); - SDValue Glue = Chain.getValue(1); - return DAG.getNode(NodeOp, DL, MVT::Other, Chain, - Op.getOperand(2), Glue); - } case Intrinsic::amdgcn_init_exec: { return DAG.getNode(AMDGPUISD::INIT_EXEC, DL, MVT::Other, Chain, Op.getOperand(2)); diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 1a57509f7c2f..58b0c4beca2c 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1110,12 +1110,11 @@ def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">; let Uses = [EXEC, M0] in { // FIXME: Should this be mayLoad+mayStore? def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16", - [(AMDGPUsendmsg (i32 imm:$simm16))] ->; + [(int_amdgcn_s_sendmsg (i32 imm:$simm16), M0)]>; def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16", - [(AMDGPUsendmsghalt (i32 imm:$simm16))] ->; + [(int_amdgcn_s_sendmsghalt (i32 imm:$simm16), M0)]>; + } // End Uses = [EXEC, M0] def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16"> {