forked from OSchip/llvm-project
AMDGPU: Add intrinsics for sin/cos
These provide direct access to the hardware instruction without the unit version required like llvm.sin/llvm.cos lowering requires. llvm-svn: 260782
This commit is contained in:
parent
f5c1f47181
commit
ce56a0ef54
|
@ -92,6 +92,18 @@ def int_amdgcn_trig_preop : Intrinsic<
|
||||||
[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]
|
[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
def int_amdgcn_sin : Intrinsic<
|
||||||
|
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
|
||||||
|
>;
|
||||||
|
|
||||||
|
def int_amdgcn_cos : Intrinsic<
|
||||||
|
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
|
||||||
|
>;
|
||||||
|
|
||||||
|
def int_amdgcn_log_clamp : Intrinsic<
|
||||||
|
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
|
||||||
|
>;
|
||||||
|
|
||||||
def int_amdgcn_rcp : Intrinsic<
|
def int_amdgcn_rcp : Intrinsic<
|
||||||
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
|
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
|
||||||
>;
|
>;
|
||||||
|
|
|
@ -1535,6 +1535,22 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||||
Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
|
Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
|
||||||
Glue);
|
Glue);
|
||||||
}
|
}
|
||||||
|
case Intrinsic::amdgcn_sin:
|
||||||
|
return DAG.getNode(AMDGPUISD::SIN_HW, DL, VT, Op.getOperand(1));
|
||||||
|
|
||||||
|
case Intrinsic::amdgcn_cos:
|
||||||
|
return DAG.getNode(AMDGPUISD::COS_HW, DL, VT, Op.getOperand(1));
|
||||||
|
|
||||||
|
case Intrinsic::amdgcn_log_clamp: {
|
||||||
|
if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
DiagnosticInfoUnsupported BadIntrin(
|
||||||
|
*MF.getFunction(), "intrinsic not supported on subtarget",
|
||||||
|
DL.getDebugLoc());
|
||||||
|
DAG.getContext()->diagnose(BadIntrin);
|
||||||
|
return DAG.getUNDEF(VT);
|
||||||
|
}
|
||||||
case Intrinsic::amdgcn_ldexp:
|
case Intrinsic::amdgcn_ldexp:
|
||||||
return DAG.getNode(AMDGPUISD::LDEXP, DL, VT,
|
return DAG.getNode(AMDGPUISD::LDEXP, DL, VT,
|
||||||
Op.getOperand(1), Op.getOperand(2));
|
Op.getOperand(1), Op.getOperand(2));
|
||||||
|
|
|
@ -1349,7 +1349,8 @@ let SubtargetPredicate = isSICI in {
|
||||||
let SchedRW = [WriteQuarterRate32] in {
|
let SchedRW = [WriteQuarterRate32] in {
|
||||||
|
|
||||||
defm V_MOV_FED_B32 : VOP1InstSI <vop1<0x9>, "v_mov_fed_b32", VOP_I32_I32>;
|
defm V_MOV_FED_B32 : VOP1InstSI <vop1<0x9>, "v_mov_fed_b32", VOP_I32_I32>;
|
||||||
defm V_LOG_CLAMP_F32 : VOP1InstSI <vop1<0x26>, "v_log_clamp_f32", VOP_F32_F32>;
|
defm V_LOG_CLAMP_F32 : VOP1InstSI <vop1<0x26>, "v_log_clamp_f32",
|
||||||
|
VOP_F32_F32, int_amdgcn_log_clamp>;
|
||||||
defm V_RCP_CLAMP_F32 : VOP1InstSI <vop1<0x28>, "v_rcp_clamp_f32", VOP_F32_F32>;
|
defm V_RCP_CLAMP_F32 : VOP1InstSI <vop1<0x28>, "v_rcp_clamp_f32", VOP_F32_F32>;
|
||||||
defm V_RCP_LEGACY_F32 : VOP1InstSI <vop1<0x29>, "v_rcp_legacy_f32", VOP_F32_F32>;
|
defm V_RCP_LEGACY_F32 : VOP1InstSI <vop1<0x29>, "v_rcp_legacy_f32", VOP_F32_F32>;
|
||||||
defm V_RSQ_CLAMP_F32 : VOP1InstSI <vop1<0x2c>, "v_rsq_clamp_f32",
|
defm V_RSQ_CLAMP_F32 : VOP1InstSI <vop1<0x2c>, "v_rsq_clamp_f32",
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
|
||||||
|
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
|
||||||
|
|
||||||
|
declare float @llvm.amdgcn.cos.f32(float) #0
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}v_cos_f32:
|
||||||
|
; GCN: v_cos_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
|
||||||
|
define void @v_cos_f32(float addrspace(1)* %out, float %src) #1 {
|
||||||
|
%cos = call float @llvm.amdgcn.cos.f32(float %src) #0
|
||||||
|
store float %cos, float addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind readnone }
|
||||||
|
attributes #1 = { nounwind }
|
|
@ -0,0 +1,17 @@
|
||||||
|
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
|
||||||
|
; RUN: not llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERR %s
|
||||||
|
|
||||||
|
; ERR: intrinsic not supported on subtarget
|
||||||
|
|
||||||
|
declare float @llvm.amdgcn.log.clamp.f32(float) #0
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}v_log_clamp_f32:
|
||||||
|
; GCN: v_log_clamp_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
|
||||||
|
define void @v_log_clamp_f32(float addrspace(1)* %out, float %src) #1 {
|
||||||
|
%log.clamp = call float @llvm.amdgcn.log.clamp.f32(float %src) #0
|
||||||
|
store float %log.clamp, float addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind readnone }
|
||||||
|
attributes #1 = { nounwind }
|
|
@ -0,0 +1,15 @@
|
||||||
|
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
|
||||||
|
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
|
||||||
|
|
||||||
|
declare float @llvm.amdgcn.sin.f32(float) #0
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}v_sin_f32:
|
||||||
|
; GCN: v_sin_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
|
||||||
|
define void @v_sin_f32(float addrspace(1)* %out, float %src) #1 {
|
||||||
|
%sin = call float @llvm.amdgcn.sin.f32(float %src) #0
|
||||||
|
store float %sin, float addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind readnone }
|
||||||
|
attributes #1 = { nounwind }
|
Loading…
Reference in New Issue