AMDGPU: Select basic interp directly from intrinsics

llvm-svn: 375457
This commit is contained in:
Matt Arsenault 2019-10-21 21:49:44 +00:00
parent 8c6913a07b
commit ef9a0278f0
5 changed files with 29 additions and 57 deletions

View File

@ -563,15 +563,26 @@ static bool hasSourceMods(const SDNode *N) {
case ISD::FREM:
case ISD::INLINEASM:
case ISD::INLINEASM_BR:
case AMDGPUISD::INTERP_P1:
case AMDGPUISD::INTERP_P2:
case AMDGPUISD::DIV_SCALE:
case ISD::INTRINSIC_W_CHAIN:
// TODO: Should really be looking at the users of the bitcast. These are
// problematic because bitcasts are used to legalize all stores to integer
// types.
case ISD::BITCAST:
return false;
case ISD::INTRINSIC_WO_CHAIN: {
switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) {
case Intrinsic::amdgcn_interp_p1:
case Intrinsic::amdgcn_interp_p2:
case Intrinsic::amdgcn_interp_mov:
case Intrinsic::amdgcn_interp_p1_f16:
case Intrinsic::amdgcn_interp_p2_f16:
return false;
default:
return true;
}
}
default:
return true;
}
@ -4283,9 +4294,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(KILL)
NODE_NAME_CASE(DUMMY_CHAIN)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
NODE_NAME_CASE(INTERP_MOV)
NODE_NAME_CASE(INTERP_P1)
NODE_NAME_CASE(INTERP_P2)
NODE_NAME_CASE(INTERP_P1LL_F16)
NODE_NAME_CASE(INTERP_P1LV_F16)
NODE_NAME_CASE(INTERP_P2_F16)

View File

@ -476,9 +476,6 @@ enum NodeType : unsigned {
BUILD_VERTICAL_VECTOR,
/// Pointer to the start of the shader's constant data.
CONST_DATA_PTR,
INTERP_MOV,
INTERP_P1,
INTERP_P2,
INTERP_P1LL_F16,
INTERP_P1LV_F16,
INTERP_P2_F16,

View File

@ -329,18 +329,6 @@ def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2",
def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
SDTypeProfile<1, 3, [SDTCisFP<0>]>,
[SDNPInGlue]>;
def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1",
SDTypeProfile<1, 3, [SDTCisFP<0>]>,
[SDNPInGlue, SDNPOutGlue]>;
def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2",
SDTypeProfile<1, 4, [SDTCisFP<0>]>,
[SDNPInGlue]>;
def AMDGPUinterp_p1ll_f16 : SDNode<"AMDGPUISD::INTERP_P1LL_F16",
SDTypeProfile<1, 7, [SDTCisFP<0>]>,
[SDNPInGlue, SDNPOutGlue]>;

View File

@ -5876,36 +5876,21 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::amdgcn_fdiv_fast:
return lowerFDIV_FAST(Op, DAG);
case Intrinsic::amdgcn_interp_mov: {
SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
Op.getOperand(4), SDValue());
return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3), ToM0.getValue(1));
}
case Intrinsic::amdgcn_interp_p1: {
SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
Op.getOperand(4), SDValue());
return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3), ToM0.getValue(1));
}
case Intrinsic::amdgcn_interp_p2: {
SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
Op.getOperand(5), SDValue());
return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
ToM0.getValue(1));
}
case Intrinsic::amdgcn_interp_p1_f16: {
SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
Op.getOperand(5), SDValue());
if (getSubtarget()->getLDSBankCount() == 16) {
// 16 bank LDS
SDValue S = DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
DAG.getConstant(2, DL, MVT::i32), // P0
Op.getOperand(2), // Attrchan
Op.getOperand(3), // Attr
ToM0.getValue(1));
// FIXME: This implicitly will insert a second CopyToReg to M0.
SDValue S = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, MVT::f32,
DAG.getTargetConstant(Intrinsic::amdgcn_interp_mov, DL, MVT::i32),
DAG.getConstant(2, DL, MVT::i32), // P0
Op.getOperand(2), // Attrchan
Op.getOperand(3), // Attr
Op.getOperand(5)); // m0
SDValue Ops[] = {
Op.getOperand(1), // Src0
Op.getOperand(2), // Attrchan
@ -10895,12 +10880,6 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
case ISD::INTRINSIC_W_CHAIN:
return AMDGPU::isIntrinsicSourceOfDivergence(
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
// In some cases intrinsics that are a source of divergence have been
// lowered to AMDGPUISD so we also need to check those too.
case AMDGPUISD::INTERP_MOV:
case AMDGPUISD::INTERP_P1:
case AMDGPUISD::INTERP_P2:
return true;
}
return false;
}

View File

@ -43,8 +43,8 @@ multiclass V_INTERP_P1_F32_m : VINTRP_m <
(outs VINTRPDst:$vdst),
(ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_p1_f32$vdst, $vsrc, $attr$attrchan",
[(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 timm:$attrchan),
(i32 timm:$attr)))]
[(set f32:$vdst, (int_amdgcn_interp_p1 f32:$vsrc,
(i32 timm:$attrchan), (i32 timm:$attr), M0))]
>;
let OtherPredicates = [has32BankLDS] in {
@ -66,8 +66,8 @@ defm V_INTERP_P2_F32 : VINTRP_m <
(outs VINTRPDst:$vdst),
(ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_p2_f32$vdst, $vsrc, $attr$attrchan",
[(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 timm:$attrchan),
(i32 timm:$attr)))]>;
[(set f32:$vdst, (int_amdgcn_interp_p2 f32:$src0, f32:$vsrc,
(i32 timm:$attrchan), (i32 timm:$attr), M0))]>;
} // End DisableEncoding = "$src0", Constraints = "$src0 = $vdst"
@ -76,8 +76,8 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
(outs VINTRPDst:$vdst),
(ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_mov_f32$vdst, $vsrc, $attr$attrchan",
[(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 timm:$attrchan),
(i32 timm:$attr)))]>;
[(set f32:$vdst, (int_amdgcn_interp_mov (i32 imm:$vsrc),
(i32 timm:$attrchan), (i32 timm:$attr), M0))]>;
} // End Uses = [M0, EXEC]