AMDGPU/GlobalISel: Directly diagnose return value use for FP atomics

Emit an error if the return value is used on subtargets that do not support them. Previously we were falling back to the DAG on selection failure, where it would emit this error and then fail again.
2022-01-19 11:06:25 -05:00 · 2022-01-19 11:06:25 -05:00 · 2e49e0cfde
parent 4822447522
commit 2e49e0cfde
3 changed files with 22 additions and 5 deletions
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@ -5181,8 +5181,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
  case Intrinsic::amdgcn_struct_buffer_atomic_inc:
  case Intrinsic::amdgcn_raw_buffer_atomic_dec:
  case Intrinsic::amdgcn_struct_buffer_atomic_dec:
  case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
  case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
  case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap:
  case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap:
  case Intrinsic::amdgcn_raw_buffer_atomic_fmin:
@ -5190,6 +5188,22 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
  case Intrinsic::amdgcn_raw_buffer_atomic_fmax:
  case Intrinsic::amdgcn_struct_buffer_atomic_fmax:
    return legalizeBufferAtomic(MI, B, IntrID);
  case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
  case Intrinsic::amdgcn_struct_buffer_atomic_fadd: {
    Register DstReg = MI.getOperand(0).getReg();
    if (!MRI.use_empty(DstReg) && !ST.hasGFX90AInsts()) {
      Function &F = B.getMF().getFunction();
      DiagnosticInfoUnsupported NoFpRet(
          F, "return versions of fp atomics not supported", B.getDebugLoc(),
          DS_Error);
      F.getContext().diagnose(NoFpRet);
      B.buildUndef(DstReg);
      MI.eraseFromParent();
      return true;
    }
    return legalizeBufferAtomic(MI, B, IntrID);
  }
  case Intrinsic::amdgcn_atomic_inc:
    return legalizeAtomicIncDec(MI, B, true);
  case Intrinsic::amdgcn_atomic_dec:
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll
@ -1,9 +1,10 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s
-; RUN: not --crash llc -global-isel < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908
+; RUN: not llc -global-isel < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908
 declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg)
 declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg)
 ; GFX908: error: {{.*}} return versions of fp atomics not supported
 ; GFX908: error: {{.*}} return versions of fp atomics not supported
 ; GFX90A-LABEL: {{^}}buffer_atomic_add_f32_rtn:
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll
@ -1,10 +1,12 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s
-; RUN: not --crash llc -global-isel < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908
+; RUN: not llc -global-isel < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908
 ; GFX908: error: {{.*}} return versions of fp atomics not supported
 ; GFX908: error: {{.*}} return versions of fp atomics not supported
 declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg)
 declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
 ; GFX908: error: {{.*}} return versions of fp atomics not supported
 ; GFX90A-LABEL: {{^}}buffer_atomic_add_f32_rtn:
 ; GFX90A: buffer_atomic_add_f32 v{{[0-9]+}}, v[{{[0-9:]+}}], s[{{[0-9:]+}}], s{{[0-9]+}} idxen offen glc