From ed633a1daad97044063934cf28985491fbed7b4c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 9 Apr 2021 18:40:59 -0400 Subject: [PATCH] AMDGPU: Restore atomic fp feature on FP atomic instruction definitions 9931b1f7a4785b6a17fb87b81a3546d61d0cbca1 switched this to checking for the two specific subtargets, instead of the dedicated feature. This broke supporting functions which force added the feature when emitting targets that do not actually support them. This stil does not work for the targets that use the gfx6/7 or gfx10 encodings. --- llvm/lib/Target/AMDGPU/FLATInstructions.td | 2 +- .../global-atomics-fp-wrong-subtarget.ll | 26 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index e52dc80ac8af..74a9db0f1c4b 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -1539,7 +1539,7 @@ defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; -let SubtargetPredicate = isGFX908orGFX90A in { +let SubtargetPredicate = HasAtomicFaddInsts in { defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>; defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; } diff --git a/llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll b/llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll new file mode 100644 index 000000000000..c76a1d07a81b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=amdgcn -mcpu=gfx803 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx803 -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx803 -d - | FileCheck -check-prefix=DISASSEMBLY-VI %s + +; Make sure we can encode and don't fail on functions which have +; instructions not actually supported by the subtarget. +; FIXME: This will still fail for gfx6/7 and gfx10 subtargets. + +; DISASSEMBLY-VI: .long 0xdd348000 // {{[0-9]+}}: DD348000 +; DISASSEMBLY-VI-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc // {{[0-9]+}}: 00000100 + +define amdgpu_kernel void @global_atomic_fadd_noret_f32_wrong_subtarget(float addrspace(1)* %ptr) #0 { +; GCN-LABEL: global_atomic_fadd_noret_f32_wrong_subtarget: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: v_mov_b32_e32 v1, 4.0 +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: global_atomic_add_f32 v0, v1, s[0:1] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_wbinvl1_vol +; GCN-NEXT: s_endpgm + %result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 syncscope("agent") seq_cst + ret void +} + +attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="+atomic-fadd-insts" "amdgpu-unsafe-fp-atomics"="true" }