forked from OSchip/llvm-project
AMDGPU: Select basic interp directly from intrinsics
llvm-svn: 375457
This commit is contained in:
parent
8c6913a07b
commit
ef9a0278f0
|
@ -563,15 +563,26 @@ static bool hasSourceMods(const SDNode *N) {
|
|||
case ISD::FREM:
|
||||
case ISD::INLINEASM:
|
||||
case ISD::INLINEASM_BR:
|
||||
case AMDGPUISD::INTERP_P1:
|
||||
case AMDGPUISD::INTERP_P2:
|
||||
case AMDGPUISD::DIV_SCALE:
|
||||
case ISD::INTRINSIC_W_CHAIN:
|
||||
|
||||
// TODO: Should really be looking at the users of the bitcast. These are
|
||||
// problematic because bitcasts are used to legalize all stores to integer
|
||||
// types.
|
||||
case ISD::BITCAST:
|
||||
return false;
|
||||
case ISD::INTRINSIC_WO_CHAIN: {
|
||||
switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) {
|
||||
case Intrinsic::amdgcn_interp_p1:
|
||||
case Intrinsic::amdgcn_interp_p2:
|
||||
case Intrinsic::amdgcn_interp_mov:
|
||||
case Intrinsic::amdgcn_interp_p1_f16:
|
||||
case Intrinsic::amdgcn_interp_p2_f16:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
@ -4283,9 +4294,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
NODE_NAME_CASE(KILL)
|
||||
NODE_NAME_CASE(DUMMY_CHAIN)
|
||||
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
|
||||
NODE_NAME_CASE(INTERP_MOV)
|
||||
NODE_NAME_CASE(INTERP_P1)
|
||||
NODE_NAME_CASE(INTERP_P2)
|
||||
NODE_NAME_CASE(INTERP_P1LL_F16)
|
||||
NODE_NAME_CASE(INTERP_P1LV_F16)
|
||||
NODE_NAME_CASE(INTERP_P2_F16)
|
||||
|
|
|
@ -476,9 +476,6 @@ enum NodeType : unsigned {
|
|||
BUILD_VERTICAL_VECTOR,
|
||||
/// Pointer to the start of the shader's constant data.
|
||||
CONST_DATA_PTR,
|
||||
INTERP_MOV,
|
||||
INTERP_P1,
|
||||
INTERP_P2,
|
||||
INTERP_P1LL_F16,
|
||||
INTERP_P1LV_F16,
|
||||
INTERP_P2_F16,
|
||||
|
|
|
@ -329,18 +329,6 @@ def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2",
|
|||
|
||||
def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
|
||||
|
||||
def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
|
||||
SDTypeProfile<1, 3, [SDTCisFP<0>]>,
|
||||
[SDNPInGlue]>;
|
||||
|
||||
def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1",
|
||||
SDTypeProfile<1, 3, [SDTCisFP<0>]>,
|
||||
[SDNPInGlue, SDNPOutGlue]>;
|
||||
|
||||
def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2",
|
||||
SDTypeProfile<1, 4, [SDTCisFP<0>]>,
|
||||
[SDNPInGlue]>;
|
||||
|
||||
def AMDGPUinterp_p1ll_f16 : SDNode<"AMDGPUISD::INTERP_P1LL_F16",
|
||||
SDTypeProfile<1, 7, [SDTCisFP<0>]>,
|
||||
[SDNPInGlue, SDNPOutGlue]>;
|
||||
|
|
|
@ -5876,36 +5876,21 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
}
|
||||
case Intrinsic::amdgcn_fdiv_fast:
|
||||
return lowerFDIV_FAST(Op, DAG);
|
||||
case Intrinsic::amdgcn_interp_mov: {
|
||||
SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
|
||||
Op.getOperand(4), SDValue());
|
||||
return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32, Op.getOperand(1),
|
||||
Op.getOperand(2), Op.getOperand(3), ToM0.getValue(1));
|
||||
}
|
||||
case Intrinsic::amdgcn_interp_p1: {
|
||||
SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
|
||||
Op.getOperand(4), SDValue());
|
||||
return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1),
|
||||
Op.getOperand(2), Op.getOperand(3), ToM0.getValue(1));
|
||||
}
|
||||
case Intrinsic::amdgcn_interp_p2: {
|
||||
SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
|
||||
Op.getOperand(5), SDValue());
|
||||
return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1),
|
||||
Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
|
||||
ToM0.getValue(1));
|
||||
}
|
||||
case Intrinsic::amdgcn_interp_p1_f16: {
|
||||
SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
|
||||
Op.getOperand(5), SDValue());
|
||||
|
||||
if (getSubtarget()->getLDSBankCount() == 16) {
|
||||
// 16 bank LDS
|
||||
SDValue S = DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
|
||||
DAG.getConstant(2, DL, MVT::i32), // P0
|
||||
Op.getOperand(2), // Attrchan
|
||||
Op.getOperand(3), // Attr
|
||||
ToM0.getValue(1));
|
||||
|
||||
// FIXME: This implicitly will insert a second CopyToReg to M0.
|
||||
SDValue S = DAG.getNode(
|
||||
ISD::INTRINSIC_WO_CHAIN, DL, MVT::f32,
|
||||
DAG.getTargetConstant(Intrinsic::amdgcn_interp_mov, DL, MVT::i32),
|
||||
DAG.getConstant(2, DL, MVT::i32), // P0
|
||||
Op.getOperand(2), // Attrchan
|
||||
Op.getOperand(3), // Attr
|
||||
Op.getOperand(5)); // m0
|
||||
|
||||
SDValue Ops[] = {
|
||||
Op.getOperand(1), // Src0
|
||||
Op.getOperand(2), // Attrchan
|
||||
|
@ -10895,12 +10880,6 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
|
|||
case ISD::INTRINSIC_W_CHAIN:
|
||||
return AMDGPU::isIntrinsicSourceOfDivergence(
|
||||
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
|
||||
// In some cases intrinsics that are a source of divergence have been
|
||||
// lowered to AMDGPUISD so we also need to check those too.
|
||||
case AMDGPUISD::INTERP_MOV:
|
||||
case AMDGPUISD::INTERP_P1:
|
||||
case AMDGPUISD::INTERP_P2:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -43,8 +43,8 @@ multiclass V_INTERP_P1_F32_m : VINTRP_m <
|
|||
(outs VINTRPDst:$vdst),
|
||||
(ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
|
||||
"v_interp_p1_f32$vdst, $vsrc, $attr$attrchan",
|
||||
[(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 timm:$attrchan),
|
||||
(i32 timm:$attr)))]
|
||||
[(set f32:$vdst, (int_amdgcn_interp_p1 f32:$vsrc,
|
||||
(i32 timm:$attrchan), (i32 timm:$attr), M0))]
|
||||
>;
|
||||
|
||||
let OtherPredicates = [has32BankLDS] in {
|
||||
|
@ -66,8 +66,8 @@ defm V_INTERP_P2_F32 : VINTRP_m <
|
|||
(outs VINTRPDst:$vdst),
|
||||
(ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
|
||||
"v_interp_p2_f32$vdst, $vsrc, $attr$attrchan",
|
||||
[(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 timm:$attrchan),
|
||||
(i32 timm:$attr)))]>;
|
||||
[(set f32:$vdst, (int_amdgcn_interp_p2 f32:$src0, f32:$vsrc,
|
||||
(i32 timm:$attrchan), (i32 timm:$attr), M0))]>;
|
||||
|
||||
} // End DisableEncoding = "$src0", Constraints = "$src0 = $vdst"
|
||||
|
||||
|
@ -76,8 +76,8 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
|
|||
(outs VINTRPDst:$vdst),
|
||||
(ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan),
|
||||
"v_interp_mov_f32$vdst, $vsrc, $attr$attrchan",
|
||||
[(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 timm:$attrchan),
|
||||
(i32 timm:$attr)))]>;
|
||||
[(set f32:$vdst, (int_amdgcn_interp_mov (i32 imm:$vsrc),
|
||||
(i32 timm:$attrchan), (i32 timm:$attr), M0))]>;
|
||||
|
||||
} // End Uses = [M0, EXEC]
|
||||
|
||||
|
|
Loading…
Reference in New Issue