forked from OSchip/llvm-project
R600/SI: Use MULADD_IEEE/V_MAD_F32 instruction for mad pattern
llvm-svn: 175446
This commit is contained in:
parent
685018009b
commit
1ce13f553e
|
@ -127,9 +127,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
return LowerIntrinsicLRP(Op, DAG);
|
||||
case AMDGPUIntrinsic::AMDIL_fraction:
|
||||
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
|
||||
case AMDGPUIntrinsic::AMDIL_mad:
|
||||
return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2), Op.getOperand(3));
|
||||
case AMDGPUIntrinsic::AMDIL_max:
|
||||
return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2));
|
||||
|
@ -176,9 +173,9 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
|
|||
Op.getOperand(1));
|
||||
SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
|
||||
Op.getOperand(3));
|
||||
return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2),
|
||||
OneSubAC);
|
||||
return DAG.getNode(ISD::FADD, DL, VT,
|
||||
DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
|
||||
OneSubAC);
|
||||
}
|
||||
|
||||
/// \brief Generate Min/Max node
|
||||
|
@ -393,7 +390,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
switch (Opcode) {
|
||||
default: return 0;
|
||||
// AMDIL DAG nodes
|
||||
NODE_NAME_CASE(MAD);
|
||||
NODE_NAME_CASE(CALL);
|
||||
NODE_NAME_CASE(UMUL);
|
||||
NODE_NAME_CASE(DIV_INF);
|
||||
|
|
|
@ -108,7 +108,6 @@ namespace AMDGPUISD {
|
|||
enum {
|
||||
// AMDIL ISD Opcodes
|
||||
FIRST_NUMBER = ISD::BUILTIN_OP_END,
|
||||
MAD, // 32bit Fused Multiply Add instruction
|
||||
CALL, // Function call based on a single integer
|
||||
UMUL, // 32bit unsigned multiplication
|
||||
DIV_INF, // Divide with infinity returned on zero divisor
|
||||
|
|
|
@ -451,7 +451,8 @@ AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
|
|||
SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
|
||||
|
||||
// float fr = mad(fqneg, fb, fa);
|
||||
SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
|
||||
SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
|
||||
DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
|
||||
|
||||
// int iq = (int)fq;
|
||||
SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
|
||||
|
|
|
@ -116,7 +116,6 @@ def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
|
|||
//===--------------------------------------------------------------------===//
|
||||
// Floating point math functions
|
||||
def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>;
|
||||
def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Integer functions
|
||||
|
|
|
@ -92,12 +92,6 @@ let TargetPrefix = "AMDIL", isTarget = 1 in {
|
|||
TernaryIntInt;
|
||||
def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">,
|
||||
TernaryIntFloat;
|
||||
def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
|
||||
|
@ -110,10 +104,6 @@ let TargetPrefix = "AMDIL", isTarget = 1 in {
|
|||
BinaryIntInt;
|
||||
def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
|
||||
|
|
|
@ -949,8 +949,13 @@ class MUL_LIT_Common <bits<5> inst> : R600_3OP <
|
|||
|
||||
class MULADD_Common <bits<5> inst> : R600_3OP <
|
||||
inst, "MULADD",
|
||||
[]
|
||||
>;
|
||||
|
||||
class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
|
||||
inst, "MULADD_IEEE",
|
||||
[(set (f32 R600_Reg32:$dst),
|
||||
(IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))]
|
||||
(fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))]
|
||||
>;
|
||||
|
||||
class CNDE_Common <bits<5> inst> : R600_3OP <
|
||||
|
@ -1107,6 +1112,7 @@ let Predicates = [isR600] in {
|
|||
|
||||
def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
|
||||
def MULADD_r600 : MULADD_Common<0x10>;
|
||||
def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>;
|
||||
def CNDE_r600 : CNDE_Common<0x18>;
|
||||
def CNDGT_r600 : CNDGT_Common<0x19>;
|
||||
def CNDGE_r600 : CNDGE_Common<0x1A>;
|
||||
|
@ -1246,6 +1252,7 @@ let Predicates = [isEGorCayman] in {
|
|||
>;
|
||||
|
||||
def MULADD_eg : MULADD_Common<0x14>;
|
||||
def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
|
||||
def ASHR_eg : ASHR_Common<0x15>;
|
||||
def LSHR_eg : LSHR_Common<0x16>;
|
||||
def LSHL_eg : LSHL_Common<0x17>;
|
||||
|
|
|
@ -1416,8 +1416,8 @@ def : Pat <
|
|||
/********** VOP3 Patterns **********/
|
||||
/********** ================== **********/
|
||||
|
||||
def : Pat <(f32 (IL_mad VSrc_32:$src0, VReg_32:$src1, VReg_32:$src2)),
|
||||
(V_MAD_LEGACY_F32 VSrc_32:$src0, VReg_32:$src1, VReg_32:$src2,
|
||||
def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VReg_32:$src1), VReg_32:$src2)),
|
||||
(V_MAD_F32 VSrc_32:$src0, VReg_32:$src1, VReg_32:$src2,
|
||||
0, 0, 0, 0)>;
|
||||
|
||||
/********** ================== **********/
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||
|
||||
;CHECK: MULADD_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
|
||||
define void @test() {
|
||||
%r0 = call float @llvm.R600.load.input(i32 0)
|
||||
%r1 = call float @llvm.R600.load.input(i32 1)
|
||||
%r2 = call float @llvm.R600.load.input(i32 2)
|
||||
%r3 = fmul float %r0, %r1
|
||||
%r4 = fadd float %r3, %r2
|
||||
call void @llvm.AMDGPU.store.output(float %r4, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.R600.load.input(i32) readnone
|
||||
|
||||
declare void @llvm.AMDGPU.store.output(float, i32)
|
||||
|
||||
declare float @fabs(float ) readnone
|
Loading…
Reference in New Issue