forked from OSchip/llvm-project
R600/SI: Fix div_scale intrinsic.
The operand that must match one of the others does matter, and implement selecting for it. llvm-svn: 211523
This commit is contained in:
parent
1f83db17c0
commit
f2b0aebb8a
|
@ -38,8 +38,13 @@ defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
|
|||
|
||||
let TargetPrefix = "AMDGPU" in {
|
||||
def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">,
|
||||
// 1st parameter: Numerator
|
||||
// 2nd parameter: Denominator
|
||||
// 3rd parameter: Constant to select select between first and
|
||||
// second. (0 = first, 1 = second).
|
||||
Intrinsic<[llvm_anyfloat_ty, llvm_i1_ty],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_AMDGPU_div_fmas : GCCBuiltin<"__builtin_amdgpu_div_fmas">,
|
||||
Intrinsic<[llvm_anyfloat_ty],
|
||||
|
|
|
@ -86,6 +86,7 @@ private:
|
|||
bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
|
||||
|
||||
SDNode *SelectADD_SUB_I64(SDNode *N);
|
||||
SDNode *SelectDIV_SCALE(SDNode *N);
|
||||
|
||||
// Include the pieces autogenerated from the target description.
|
||||
#include "AMDGPUGenDAGISel.inc"
|
||||
|
@ -454,6 +455,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
|||
PackedOffsetWidth);
|
||||
|
||||
}
|
||||
case AMDGPUISD::DIV_SCALE: {
|
||||
return SelectDIV_SCALE(N);
|
||||
}
|
||||
}
|
||||
return SelectCode(N);
|
||||
}
|
||||
|
@ -695,6 +699,30 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
|
|||
return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
|
||||
}
|
||||
|
||||
SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
|
||||
SDLoc SL(N);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
assert(VT == MVT::f32 || VT == MVT::f64);
|
||||
|
||||
unsigned Opc
|
||||
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
|
||||
|
||||
const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
|
||||
SDValue Ops[] = {
|
||||
N->getOperand(0),
|
||||
N->getOperand(1),
|
||||
N->getOperand(2),
|
||||
Zero,
|
||||
Zero,
|
||||
Zero,
|
||||
Zero
|
||||
};
|
||||
|
||||
return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
|
||||
}
|
||||
|
||||
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
|
||||
const AMDGPUTargetLowering& Lowering =
|
||||
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
|
||||
|
|
|
@ -771,9 +771,21 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
|
||||
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
|
||||
|
||||
case Intrinsic::AMDGPU_div_scale:
|
||||
case Intrinsic::AMDGPU_div_scale: {
|
||||
// 3rd parameter required to be a constant.
|
||||
const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3));
|
||||
if (!Param)
|
||||
return DAG.getUNDEF(VT);
|
||||
|
||||
// Translate to the operands expected by the machine instruction. The
|
||||
// first parameter must be the same as the first instruction.
|
||||
SDValue Numerator = Op.getOperand(1);
|
||||
SDValue Denominator = Op.getOperand(2);
|
||||
SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator;
|
||||
|
||||
return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, VT,
|
||||
Op.getOperand(1), Op.getOperand(2));
|
||||
Src0, Denominator, Numerator);
|
||||
}
|
||||
|
||||
case Intrinsic::AMDGPU_div_fmas:
|
||||
return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
|
||||
|
|
|
@ -433,6 +433,22 @@ class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
|
|||
opName#" $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers, $clamp, $omod", pattern
|
||||
>, VOP <opName>;
|
||||
|
||||
|
||||
class VOP3b_Helper <bits<9> op, RegisterClass vrc, RegisterClass arc,
|
||||
string opName, list<dag> pattern> : VOP3 <
|
||||
op, (outs vrc:$dst0, SReg_64:$dst1),
|
||||
(ins arc:$src0, arc:$src1, arc:$src2,
|
||||
InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
|
||||
opName#" $dst0, $dst1, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
|
||||
>, VOP <opName>;
|
||||
|
||||
|
||||
class VOP3b_64 <bits<9> op, string opName, list<dag> pattern> :
|
||||
VOP3b_Helper <op, VReg_64, VSrc_64, opName, pattern>;
|
||||
|
||||
class VOP3b_32 <bits<9> op, string opName, list<dag> pattern> :
|
||||
VOP3b_Helper <op, VReg_32, VSrc_32, opName, pattern>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Vector I/O classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -1455,8 +1455,10 @@ defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
|
|||
|
||||
} // isCommutable = 1
|
||||
|
||||
defm V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
|
||||
def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
|
||||
def V_DIV_SCALE_F32 : VOP3b_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
|
||||
|
||||
// Double precision division pre-scale.
|
||||
def V_DIV_SCALE_F64 : VOP3b_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
|
||||
|
||||
defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32",
|
||||
[(set f32:$dst, (AMDGPUdiv_fmas f32:$src0, f32:$src1, f32:$src2))]
|
||||
|
|
|
@ -1,23 +1,48 @@
|
|||
; XFAIL: *
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare float @llvm.AMDGPU.div.scale.f32(float, float) nounwind readnone
|
||||
declare double @llvm.AMDGPU.div.scale.f64(double, double) nounwind readnone
|
||||
declare { float, i1 } @llvm.AMDGPU.div.scale.f32(float, float, i1) nounwind readnone
|
||||
declare { double, i1 } @llvm.AMDGPU.div.scale.f64(double, double, i1) nounwind readnone
|
||||
|
||||
; SI-LABEL @test_div_scale_f32:
|
||||
define void @test_div_scale_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind {
|
||||
; SI-LABEL @test_div_scale_f32_1:
|
||||
; SI: V_DIV_SCALE_F32
|
||||
define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind {
|
||||
%a = load float addrspace(1)* %aptr, align 4
|
||||
%b = load float addrspace(1)* %bptr, align 4
|
||||
%result = call float @llvm.AMDGPU.div.scale.f32(float %a, float %b) nounwind readnone
|
||||
store float %result, float addrspace(1)* %out, align 4
|
||||
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
|
||||
%result0 = extractvalue { float, i1 } %result, 0
|
||||
store float %result0, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL @test_div_scale_f64:
|
||||
define void @test_div_scale_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr) nounwind {
|
||||
%a = load double addrspace(1)* %aptr, align 8
|
||||
%b = load double addrspace(1)* %bptr, align 8
|
||||
%result = call double @llvm.AMDGPU.div.scale.f64(double %a, double %b) nounwind readnone
|
||||
store double %result, double addrspace(1)* %out, align 8
|
||||
; SI-LABEL @test_div_scale_f32_2:
|
||||
; SI: V_DIV_SCALE_F32
|
||||
define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind {
|
||||
%a = load float addrspace(1)* %aptr, align 4
|
||||
%b = load float addrspace(1)* %bptr, align 4
|
||||
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
|
||||
%result0 = extractvalue { float, i1 } %result, 0
|
||||
store float %result0, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL @test_div_scale_f64_1:
|
||||
; SI: V_DIV_SCALE_F64
|
||||
define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr, double addrspace(1)* %cptr) nounwind {
|
||||
%a = load double addrspace(1)* %aptr, align 8
|
||||
%b = load double addrspace(1)* %bptr, align 8
|
||||
%result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
|
||||
%result0 = extractvalue { double, i1 } %result, 0
|
||||
store double %result0, double addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL @test_div_scale_f64_1:
|
||||
; SI: V_DIV_SCALE_F64
|
||||
define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr, double addrspace(1)* %cptr) nounwind {
|
||||
%a = load double addrspace(1)* %aptr, align 8
|
||||
%b = load double addrspace(1)* %bptr, align 8
|
||||
%result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
|
||||
%result0 = extractvalue { double, i1 } %result, 0
|
||||
store double %result0, double addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue