AMDGPU: Fold more custom nodes to undef

This will help sneak undefs past GVN into the DAG for
some tests.

Also add missing intrinsic for rsq_legacy, even though the node
was already selected to the instruction. Also start passing
the debug location to intrinsic errors.

llvm-svn: 273181
This commit is contained in:
Matt Arsenault 2016-06-20 18:33:56 +00:00
parent 14dcb042bc
commit b6d8c37e1a
10 changed files with 168 additions and 17 deletions

View File

@ -114,6 +114,11 @@ def int_amdgcn_rsq : Intrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
>;
def int_amdgcn_rsq_legacy : GCCBuiltin<"__builtin_amdgcn_rsq_legacy">,
Intrinsic<
[llvm_float_ty], [llvm_float_ty], [IntrNoMem]
>;
def int_amdgcn_rsq_clamp : Intrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;

View File

@ -1495,9 +1495,18 @@ SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG,
DAG.getValueType(VT));
}
static SDValue emitNonHSAIntrinsicError(SelectionDAG& DAG, EVT VT) {
static SDValue emitNonHSAIntrinsicError(SelectionDAG& DAG, SDLoc DL, EVT VT) {
DiagnosticInfoUnsupported BadIntrin(*DAG.getMachineFunction().getFunction(),
"non-hsa intrinsic with hsa target");
"non-hsa intrinsic with hsa target",
DL.getDebugLoc());
DAG.getContext()->diagnose(BadIntrin);
return DAG.getUNDEF(VT);
}
static SDValue emitRemovedIntrinsicError(SelectionDAG& DAG, SDLoc DL, EVT VT) {
DiagnosticInfoUnsupported BadIntrin(*DAG.getMachineFunction().getFunction(),
"intrinsic not supported on subtarget",
DL.getDebugLoc());
DAG.getContext()->diagnose(BadIntrin);
return DAG.getUNDEF(VT);
}
@ -1541,6 +1550,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_rsq:
case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
case Intrinsic::amdgcn_rsq_legacy: {
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return emitRemovedIntrinsicError(DAG, DL, VT);
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
}
case Intrinsic::amdgcn_rsq_clamp:
case AMDGPUIntrinsic::AMDGPU_rsq_clamped: { // Legacy name
if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
@ -1558,55 +1573,55 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::r600_read_ngroups_x:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, VT);
return emitNonHSAIntrinsicError(DAG, DL, VT);
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::NGROUPS_X, false);
case Intrinsic::r600_read_ngroups_y:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, VT);
return emitNonHSAIntrinsicError(DAG, DL, VT);
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::NGROUPS_Y, false);
case Intrinsic::r600_read_ngroups_z:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, VT);
return emitNonHSAIntrinsicError(DAG, DL, VT);
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::NGROUPS_Z, false);
case Intrinsic::r600_read_global_size_x:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, VT);
return emitNonHSAIntrinsicError(DAG, DL, VT);
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::GLOBAL_SIZE_X, false);
case Intrinsic::r600_read_global_size_y:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, VT);
return emitNonHSAIntrinsicError(DAG, DL, VT);
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::GLOBAL_SIZE_Y, false);
case Intrinsic::r600_read_global_size_z:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, VT);
return emitNonHSAIntrinsicError(DAG, DL, VT);
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
case Intrinsic::r600_read_local_size_x:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, VT);
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerImplicitZextParam(DAG, Op, MVT::i16,
SI::KernelInputOffsets::LOCAL_SIZE_X);
case Intrinsic::r600_read_local_size_y:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, VT);
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerImplicitZextParam(DAG, Op, MVT::i16,
SI::KernelInputOffsets::LOCAL_SIZE_Y);
case Intrinsic::r600_read_local_size_z:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, VT);
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerImplicitZextParam(DAG, Op, MVT::i16,
SI::KernelInputOffsets::LOCAL_SIZE_Z);
@ -2564,6 +2579,9 @@ SDValue SITargetLowering::performClassCombine(SDNode *N,
return DAG.getConstant(0, SDLoc(N), MVT::i1);
}
if (N->getOperand(0).isUndef())
return DAG.getUNDEF(MVT::i1);
return SDValue();
}
@ -2964,6 +2982,17 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performClassCombine(N, DCI);
case ISD::FCANONICALIZE:
return performFCanonicalizeCombine(N, DCI);
case AMDGPUISD::FRACT:
case AMDGPUISD::RCP:
case AMDGPUISD::RSQ:
case AMDGPUISD::RSQ_LEGACY:
case AMDGPUISD::RSQ_CLAMP:
case AMDGPUISD::LDEXP: {
SDValue Src = N->getOperand(0);
if (Src.isUndef())
return Src;
break;
}
}
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
}

View File

@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare i1 @llvm.amdgcn.class.f32(float, i32) #1
declare i1 @llvm.amdgcn.class.f64(double, i32) #1
@ -495,5 +495,17 @@ define void @test_class_0_f64(i32 addrspace(1)* %out, double %a) #0 {
ret void
}
; FIXME: Why is the extension still here?
; SI-LABEL: {{^}}test_class_undef_f32:
; SI-NOT: v_cmp_class
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1,
; SI: buffer_store_dword
define void @test_class_undef_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
%result = call i1 @llvm.amdgcn.class.f32(float undef, i32 %b) #1
%sext = sext i1 %result to i32
store i32 %sext, i32 addrspace(1)* %out, align 4
ret void
}
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -20,5 +20,15 @@ define void @v_fract_f64(double addrspace(1)* %out, double %src) #1 {
ret void
}
; GCN-LABEL: {{^}}v_fract_undef_f32:
; GCN-NOT: v_fract_f32
; GCN-NOT: v0
; GCN: buffer_store_dword v0
define void @v_fract_undef_f32(float addrspace(1)* %out) #1 {
%fract = call float @llvm.amdgcn.fract.f32(float undef)
store float %fract, float addrspace(1)* %out
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

View File

@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare float @llvm.amdgcn.ldexp.f32(float, i32) nounwind readnone
@ -42,3 +42,11 @@ define void @test_legacy_ldexp_f64(double addrspace(1)* %out, double %a, i32 %b)
store double %result, double addrspace(1)* %out, align 8
ret void
}
; SI-LABEL: {{^}}test_ldexp_undef_f32:
; SI-NOT: v_ldexp_f32
define void @test_ldexp_undef_f32(float addrspace(1)* %out, i32 %b) nounwind {
%result = call float @llvm.amdgcn.ldexp.f32(float undef, i32 %b) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}

View File

@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
; XUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
; XUN: llc -march=amdgcn -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
; XUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
@ -69,5 +69,13 @@ define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
ret void
}
; FUNC-LABEL: {{^}}rcp_undef_f32:
; SI-NOT: v_rcp_f32
define void @rcp_undef_f32(float addrspace(1)* %out) #1 {
%rcp = call float @llvm.amdgcn.rcp.f32(float undef) #0
store float %rcp, float addrspace(1)* %out, align 4
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

View File

@ -38,5 +38,13 @@ define void @rsq_clamp_f64(double addrspace(1)* %out, double %src) #0 {
ret void
}
; FUNC-LABEL: {{^}}rsq_clamp_undef_f32:
; SI-NOT: v_rsq_clamp_f32
define void @rsq_clamp_undef_f32(float addrspace(1)* %out) #0 {
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float undef)
store float %rsq_clamp, float addrspace(1)* %out
ret void
}
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

View File

@ -0,0 +1,39 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare float @llvm.amdgcn.rsq.legacy(float) #0
; FUNC-LABEL: {{^}}rsq_legacy_f32:
; SI: v_rsq_legacy_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
define void @rsq_legacy_f32(float addrspace(1)* %out, float %src) #1 {
%rsq = call float @llvm.amdgcn.rsq.legacy(float %src) #0
store float %rsq, float addrspace(1)* %out, align 4
ret void
}
; TODO: Really these should be constant folded
; FUNC-LABEL: {{^}}rsq_legacy_f32_constant_4.0
; SI: v_rsq_legacy_f32_e32 {{v[0-9]+}}, 4.0
define void @rsq_legacy_f32_constant_4.0(float addrspace(1)* %out) #1 {
%rsq = call float @llvm.amdgcn.rsq.legacy(float 4.0) #0
store float %rsq, float addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: {{^}}rsq_legacy_f32_constant_100.0
; SI: v_rsq_legacy_f32_e32 {{v[0-9]+}}, 0x42c80000
define void @rsq_legacy_f32_constant_100.0(float addrspace(1)* %out) #1 {
%rsq = call float @llvm.amdgcn.rsq.legacy(float 100.0) #0
store float %rsq, float addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: {{^}}rsq_legacy_undef_f32:
; SI-NOT: v_rsq_legacy_f32
define void @rsq_legacy_undef_f32(float addrspace(1)* %out) #1 {
%rsq = call float @llvm.amdgcn.rsq.legacy(float undef)
store float %rsq, float addrspace(1)* %out, align 4
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

View File

@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare float @llvm.amdgcn.rsq.f32(float) #0
@ -56,5 +56,13 @@ define void @rsq_f64_constant_100.0(double addrspace(1)* %out) #1 {
ret void
}
; FUNC-LABEL: {{^}}rsq_undef_f32:
; SI-NOT: v_rsq_f32
define void @rsq_undef_f32(float addrspace(1)* %out) #1 {
%rsq = call float @llvm.amdgcn.rsq.f32(float undef)
store float %rsq, float addrspace(1)* %out, align 4
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

View File

@ -0,0 +1,24 @@
; RUN: not llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck -check-prefix=ERROR %s
; ERROR: error: :1:42: in function rsq_legacy_f32 void (float addrspace(1)*, float): intrinsic not supported on subtarget
declare float @llvm.amdgcn.rsq.legacy(float) #0
define void @rsq_legacy_f32(float addrspace(1)* %out, float %src) #1 {
%rsq = call float @llvm.amdgcn.rsq.legacy(float %src), !dbg !4
store float %rsq, float addrspace(1)* %out, align 4
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2, !3}
!0 = distinct !DICompileUnit(language: DW_LANG_OpenCL, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug)
!1 = !DIFile(filename: "foo.cl", directory: "/dev/null")
!2 = !{i32 2, !"Dwarf Version", i32 4}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !DILocation(line: 1, column: 42, scope: !5)
!5 = distinct !DISubprogram(name: "rsq_legacy_f32", scope: null, line: 1, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, unit: !0)