forked from OSchip/llvm-project
AMDGPU: Use tablegen pattern for sendmsg intrinsics
Since this now emits a direct copy to m0, SIFixSGPRCopies has to handle a physical register. llvm-svn: 367593
This commit is contained in:
parent
20b198ec5e
commit
aff2995f46
|
@ -199,9 +199,11 @@ def int_amdgcn_wavefrontsize :
|
|||
// The first parameter is s_sendmsg immediate (i16),
|
||||
// the second one is copied to m0
|
||||
def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
|
||||
Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>;
|
||||
Intrinsic <[], [llvm_i32_ty, llvm_i32_ty],
|
||||
[ImmArg<0>, IntrNoMem, IntrHasSideEffects]>;
|
||||
def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
|
||||
Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>;
|
||||
Intrinsic <[], [llvm_i32_ty, llvm_i32_ty],
|
||||
[ImmArg<0>, IntrNoMem, IntrHasSideEffects]>;
|
||||
|
||||
def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
|
||||
Intrinsic<[], [], [IntrConvergent]>;
|
||||
|
|
|
@ -619,13 +619,29 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
|
|||
case AMDGPU::WQM:
|
||||
case AMDGPU::SOFT_WQM:
|
||||
case AMDGPU::WWM: {
|
||||
// If the destination register is a physical register there isn't really
|
||||
// much we can do to fix this.
|
||||
if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
|
||||
continue;
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
|
||||
const TargetRegisterClass *SrcRC, *DstRC;
|
||||
std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
|
||||
|
||||
if (!TargetRegisterInfo::isVirtualRegister(DstReg)) {
|
||||
// If the destination register is a physical register there isn't
|
||||
// really much we can do to fix this.
|
||||
// Some special instructions use M0 as an input. Some even only use
|
||||
// the first lane. Insert a readfirstlane and hope for the best.
|
||||
if (DstReg == AMDGPU::M0 && TRI->hasVectorRegisters(SrcRC)) {
|
||||
Register TmpReg
|
||||
= MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
|
||||
|
||||
BuildMI(MBB, MI, MI.getDebugLoc(),
|
||||
TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
|
||||
.add(MI.getOperand(1));
|
||||
MI.getOperand(1).setReg(TmpReg);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
|
||||
unsigned SrcReg = MI.getOperand(1).getReg();
|
||||
if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
|
||||
|
|
|
@ -6735,15 +6735,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
|||
AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
|
||||
return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
|
||||
}
|
||||
case Intrinsic::amdgcn_s_sendmsg:
|
||||
case Intrinsic::amdgcn_s_sendmsghalt: {
|
||||
unsigned NodeOp = (IntrinsicID == Intrinsic::amdgcn_s_sendmsg) ?
|
||||
AMDGPUISD::SENDMSG : AMDGPUISD::SENDMSGHALT;
|
||||
Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
|
||||
SDValue Glue = Chain.getValue(1);
|
||||
return DAG.getNode(NodeOp, DL, MVT::Other, Chain,
|
||||
Op.getOperand(2), Glue);
|
||||
}
|
||||
case Intrinsic::amdgcn_init_exec: {
|
||||
return DAG.getNode(AMDGPUISD::INIT_EXEC, DL, MVT::Other, Chain,
|
||||
Op.getOperand(2));
|
||||
|
|
|
@ -1110,12 +1110,11 @@ def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">;
|
|||
let Uses = [EXEC, M0] in {
|
||||
// FIXME: Should this be mayLoad+mayStore?
|
||||
def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
|
||||
[(AMDGPUsendmsg (i32 imm:$simm16))]
|
||||
>;
|
||||
[(int_amdgcn_s_sendmsg (i32 imm:$simm16), M0)]>;
|
||||
|
||||
def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16",
|
||||
[(AMDGPUsendmsghalt (i32 imm:$simm16))]
|
||||
>;
|
||||
[(int_amdgcn_s_sendmsghalt (i32 imm:$simm16), M0)]>;
|
||||
|
||||
} // End Uses = [EXEC, M0]
|
||||
|
||||
def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16"> {
|
||||
|
|
Loading…
Reference in New Issue