forked from OSchip/llvm-project
AMDGPU: Fold more custom nodes to undef
This will help sneak undefs past GVN into the DAG for some tests. Also add missing intrinsic for rsq_legacy, even though the node was already selected to the instruction. Also start passing the debug location to intrinsic errors. llvm-svn: 273181
This commit is contained in:
parent
14dcb042bc
commit
b6d8c37e1a
|
@ -114,6 +114,11 @@ def int_amdgcn_rsq : Intrinsic<
|
|||
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
|
||||
>;
|
||||
|
||||
def int_amdgcn_rsq_legacy : GCCBuiltin<"__builtin_amdgcn_rsq_legacy">,
|
||||
Intrinsic<
|
||||
[llvm_float_ty], [llvm_float_ty], [IntrNoMem]
|
||||
>;
|
||||
|
||||
def int_amdgcn_rsq_clamp : Intrinsic<
|
||||
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
|
||||
|
||||
|
|
|
@ -1495,9 +1495,18 @@ SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG,
|
|||
DAG.getValueType(VT));
|
||||
}
|
||||
|
||||
static SDValue emitNonHSAIntrinsicError(SelectionDAG& DAG, EVT VT) {
|
||||
static SDValue emitNonHSAIntrinsicError(SelectionDAG& DAG, SDLoc DL, EVT VT) {
|
||||
DiagnosticInfoUnsupported BadIntrin(*DAG.getMachineFunction().getFunction(),
|
||||
"non-hsa intrinsic with hsa target");
|
||||
"non-hsa intrinsic with hsa target",
|
||||
DL.getDebugLoc());
|
||||
DAG.getContext()->diagnose(BadIntrin);
|
||||
return DAG.getUNDEF(VT);
|
||||
}
|
||||
|
||||
static SDValue emitRemovedIntrinsicError(SelectionDAG& DAG, SDLoc DL, EVT VT) {
|
||||
DiagnosticInfoUnsupported BadIntrin(*DAG.getMachineFunction().getFunction(),
|
||||
"intrinsic not supported on subtarget",
|
||||
DL.getDebugLoc());
|
||||
DAG.getContext()->diagnose(BadIntrin);
|
||||
return DAG.getUNDEF(VT);
|
||||
}
|
||||
|
@ -1541,6 +1550,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
case Intrinsic::amdgcn_rsq:
|
||||
case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
|
||||
return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
|
||||
case Intrinsic::amdgcn_rsq_legacy: {
|
||||
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return emitRemovedIntrinsicError(DAG, DL, VT);
|
||||
|
||||
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
|
||||
}
|
||||
case Intrinsic::amdgcn_rsq_clamp:
|
||||
case AMDGPUIntrinsic::AMDGPU_rsq_clamped: { // Legacy name
|
||||
if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
|
@ -1558,55 +1573,55 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
}
|
||||
case Intrinsic::r600_read_ngroups_x:
|
||||
if (Subtarget->isAmdHsaOS())
|
||||
return emitNonHSAIntrinsicError(DAG, VT);
|
||||
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
||||
|
||||
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||
SI::KernelInputOffsets::NGROUPS_X, false);
|
||||
case Intrinsic::r600_read_ngroups_y:
|
||||
if (Subtarget->isAmdHsaOS())
|
||||
return emitNonHSAIntrinsicError(DAG, VT);
|
||||
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
||||
|
||||
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||
SI::KernelInputOffsets::NGROUPS_Y, false);
|
||||
case Intrinsic::r600_read_ngroups_z:
|
||||
if (Subtarget->isAmdHsaOS())
|
||||
return emitNonHSAIntrinsicError(DAG, VT);
|
||||
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
||||
|
||||
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||
SI::KernelInputOffsets::NGROUPS_Z, false);
|
||||
case Intrinsic::r600_read_global_size_x:
|
||||
if (Subtarget->isAmdHsaOS())
|
||||
return emitNonHSAIntrinsicError(DAG, VT);
|
||||
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
||||
|
||||
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||
SI::KernelInputOffsets::GLOBAL_SIZE_X, false);
|
||||
case Intrinsic::r600_read_global_size_y:
|
||||
if (Subtarget->isAmdHsaOS())
|
||||
return emitNonHSAIntrinsicError(DAG, VT);
|
||||
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
||||
|
||||
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||
SI::KernelInputOffsets::GLOBAL_SIZE_Y, false);
|
||||
case Intrinsic::r600_read_global_size_z:
|
||||
if (Subtarget->isAmdHsaOS())
|
||||
return emitNonHSAIntrinsicError(DAG, VT);
|
||||
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
||||
|
||||
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||
SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
|
||||
case Intrinsic::r600_read_local_size_x:
|
||||
if (Subtarget->isAmdHsaOS())
|
||||
return emitNonHSAIntrinsicError(DAG, VT);
|
||||
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
||||
|
||||
return lowerImplicitZextParam(DAG, Op, MVT::i16,
|
||||
SI::KernelInputOffsets::LOCAL_SIZE_X);
|
||||
case Intrinsic::r600_read_local_size_y:
|
||||
if (Subtarget->isAmdHsaOS())
|
||||
return emitNonHSAIntrinsicError(DAG, VT);
|
||||
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
||||
|
||||
return lowerImplicitZextParam(DAG, Op, MVT::i16,
|
||||
SI::KernelInputOffsets::LOCAL_SIZE_Y);
|
||||
case Intrinsic::r600_read_local_size_z:
|
||||
if (Subtarget->isAmdHsaOS())
|
||||
return emitNonHSAIntrinsicError(DAG, VT);
|
||||
return emitNonHSAIntrinsicError(DAG, DL, VT);
|
||||
|
||||
return lowerImplicitZextParam(DAG, Op, MVT::i16,
|
||||
SI::KernelInputOffsets::LOCAL_SIZE_Z);
|
||||
|
@ -2564,6 +2579,9 @@ SDValue SITargetLowering::performClassCombine(SDNode *N,
|
|||
return DAG.getConstant(0, SDLoc(N), MVT::i1);
|
||||
}
|
||||
|
||||
if (N->getOperand(0).isUndef())
|
||||
return DAG.getUNDEF(MVT::i1);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -2964,6 +2982,17 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
|
|||
return performClassCombine(N, DCI);
|
||||
case ISD::FCANONICALIZE:
|
||||
return performFCanonicalizeCombine(N, DCI);
|
||||
case AMDGPUISD::FRACT:
|
||||
case AMDGPUISD::RCP:
|
||||
case AMDGPUISD::RSQ:
|
||||
case AMDGPUISD::RSQ_LEGACY:
|
||||
case AMDGPUISD::RSQ_CLAMP:
|
||||
case AMDGPUISD::LDEXP: {
|
||||
SDValue Src = N->getOperand(0);
|
||||
if (Src.isUndef())
|
||||
return Src;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare i1 @llvm.amdgcn.class.f32(float, i32) #1
|
||||
declare i1 @llvm.amdgcn.class.f64(double, i32) #1
|
||||
|
@ -495,5 +495,17 @@ define void @test_class_0_f64(i32 addrspace(1)* %out, double %a) #0 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; FIXME: Why is the extension still here?
|
||||
; SI-LABEL: {{^}}test_class_undef_f32:
|
||||
; SI-NOT: v_cmp_class
|
||||
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1,
|
||||
; SI: buffer_store_dword
|
||||
define void @test_class_undef_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
|
||||
%result = call i1 @llvm.amdgcn.class.f32(float undef, i32 %b) #1
|
||||
%sext = sext i1 %result to i32
|
||||
store i32 %sext, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
|
@ -20,5 +20,15 @@ define void @v_fract_f64(double addrspace(1)* %out, double %src) #1 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_fract_undef_f32:
|
||||
; GCN-NOT: v_fract_f32
|
||||
; GCN-NOT: v0
|
||||
; GCN: buffer_store_dword v0
|
||||
define void @v_fract_undef_f32(float addrspace(1)* %out) #1 {
|
||||
%fract = call float @llvm.amdgcn.fract.f32(float undef)
|
||||
store float %fract, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare float @llvm.amdgcn.ldexp.f32(float, i32) nounwind readnone
|
||||
|
@ -42,3 +42,11 @@ define void @test_legacy_ldexp_f64(double addrspace(1)* %out, double %a, i32 %b)
|
|||
store double %result, double addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_ldexp_undef_f32:
|
||||
; SI-NOT: v_ldexp_f32
|
||||
define void @test_ldexp_undef_f32(float addrspace(1)* %out, i32 %b) nounwind {
|
||||
%result = call float @llvm.amdgcn.ldexp.f32(float undef, i32 %b) nounwind readnone
|
||||
store float %result, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; XUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; XUN: llc -march=amdgcn -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; XUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
@ -69,5 +69,13 @@ define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}rcp_undef_f32:
|
||||
; SI-NOT: v_rcp_f32
|
||||
define void @rcp_undef_f32(float addrspace(1)* %out) #1 {
|
||||
%rcp = call float @llvm.amdgcn.rcp.f32(float undef) #0
|
||||
store float %rcp, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
|
|
|
@ -38,5 +38,13 @@ define void @rsq_clamp_f64(double addrspace(1)* %out, double %src) #0 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}rsq_clamp_undef_f32:
|
||||
; SI-NOT: v_rsq_clamp_f32
|
||||
define void @rsq_clamp_undef_f32(float addrspace(1)* %out) #0 {
|
||||
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float undef)
|
||||
store float %rsq_clamp, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare float @llvm.amdgcn.rsq.legacy(float) #0
|
||||
|
||||
; FUNC-LABEL: {{^}}rsq_legacy_f32:
|
||||
; SI: v_rsq_legacy_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
|
||||
define void @rsq_legacy_f32(float addrspace(1)* %out, float %src) #1 {
|
||||
%rsq = call float @llvm.amdgcn.rsq.legacy(float %src) #0
|
||||
store float %rsq, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; TODO: Really these should be constant folded
|
||||
; FUNC-LABEL: {{^}}rsq_legacy_f32_constant_4.0
|
||||
; SI: v_rsq_legacy_f32_e32 {{v[0-9]+}}, 4.0
|
||||
define void @rsq_legacy_f32_constant_4.0(float addrspace(1)* %out) #1 {
|
||||
%rsq = call float @llvm.amdgcn.rsq.legacy(float 4.0) #0
|
||||
store float %rsq, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}rsq_legacy_f32_constant_100.0
|
||||
; SI: v_rsq_legacy_f32_e32 {{v[0-9]+}}, 0x42c80000
|
||||
define void @rsq_legacy_f32_constant_100.0(float addrspace(1)* %out) #1 {
|
||||
%rsq = call float @llvm.amdgcn.rsq.legacy(float 100.0) #0
|
||||
store float %rsq, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}rsq_legacy_undef_f32:
|
||||
; SI-NOT: v_rsq_legacy_f32
|
||||
define void @rsq_legacy_undef_f32(float addrspace(1)* %out) #1 {
|
||||
%rsq = call float @llvm.amdgcn.rsq.legacy(float undef)
|
||||
store float %rsq, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare float @llvm.amdgcn.rsq.f32(float) #0
|
||||
|
@ -56,5 +56,13 @@ define void @rsq_f64_constant_100.0(double addrspace(1)* %out) #1 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}rsq_undef_f32:
|
||||
; SI-NOT: v_rsq_f32
|
||||
define void @rsq_undef_f32(float addrspace(1)* %out) #1 {
|
||||
%rsq = call float @llvm.amdgcn.rsq.f32(float undef)
|
||||
store float %rsq, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
; RUN: not llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck -check-prefix=ERROR %s
|
||||
|
||||
; ERROR: error: :1:42: in function rsq_legacy_f32 void (float addrspace(1)*, float): intrinsic not supported on subtarget
|
||||
|
||||
declare float @llvm.amdgcn.rsq.legacy(float) #0
|
||||
|
||||
define void @rsq_legacy_f32(float addrspace(1)* %out, float %src) #1 {
|
||||
%rsq = call float @llvm.amdgcn.rsq.legacy(float %src), !dbg !4
|
||||
store float %rsq, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!2, !3}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_OpenCL, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug)
|
||||
!1 = !DIFile(filename: "foo.cl", directory: "/dev/null")
|
||||
!2 = !{i32 2, !"Dwarf Version", i32 4}
|
||||
!3 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!4 = !DILocation(line: 1, column: 42, scope: !5)
|
||||
!5 = distinct !DISubprogram(name: "rsq_legacy_f32", scope: null, line: 1, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, unit: !0)
|
Loading…
Reference in New Issue