llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll

; RUN: llc < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs | FileCheck %s -check-prefix=GCN
; RUN: llc < %s -march=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck %s -check-prefix=GCN

declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1)
declare <2 x half> @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i1)
declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)*, float)
declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)*, <2 x half>)

; GCN-LABEL: {{^}}buffer_atomic_add_f32:
; GCN: buffer_atomic_add_f32 v0, v1, s[0:3], 0 idxen
define amdgpu_ps void @buffer_atomic_add_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {
main_body:
  %ret = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
  ret void
}

; GCN-LABEL: {{^}}buffer_atomic_add_f32_off4_slc:
; GCN: buffer_atomic_add_f32 v0, v1, s[0:3], 0 idxen offset:4 slc
define amdgpu_ps void @buffer_atomic_add_f32_off4_slc(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {
main_body:
  %ret = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1)
  ret void
}

; GCN-LABEL: {{^}}buffer_atomic_pk_add_v2f16:
; GCN: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 idxen
define amdgpu_ps void @buffer_atomic_pk_add_v2f16(<4 x i32> inreg %rsrc, <2 x half> %data, i32 %vindex) {
main_body:
  %ret = call <2 x half> @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
  ret void
}

; GCN-LABEL: {{^}}buffer_atomic_pk_add_v2f16_off4_slc:
; GCN: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 idxen offset:4 slc
define amdgpu_ps void @buffer_atomic_pk_add_v2f16_off4_slc(<4 x i32> inreg %rsrc, <2 x half> %data, i32 %vindex) {
main_body:
  %ret = call <2 x half> @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1)
  ret void
}

; GCN-LABEL: {{^}}global_atomic_add_f32:
; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @global_atomic_add_f32(float addrspace(1)* %ptr, float %data) {
main_body:
  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data)
  ret void
}

; GCN-LABEL: {{^}}global_atomic_add_f32_off4:
; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4
define amdgpu_kernel void @global_atomic_add_f32_off4(float addrspace(1)* %ptr, float %data) {
main_body:
  %p = getelementptr float, float addrspace(1)* %ptr, i64 1
  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %p, float %data)
  ret void
}

; GCN-LABEL: {{^}}global_atomic_add_f32_offneg4:
; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:-4
define amdgpu_kernel void @global_atomic_add_f32_offneg4(float addrspace(1)* %ptr, float %data) {
main_body:
  %p = getelementptr float, float addrspace(1)* %ptr, i64 -1
  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %p, float %data)
  ret void
}

; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16:
; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
define amdgpu_kernel void @global_atomic_pk_add_v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
main_body:
  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data)
  ret void
}

; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_off4:
; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4
define amdgpu_kernel void @global_atomic_pk_add_v2f16_off4(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
main_body:
  %p = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 1
  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data)
  ret void
}

; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_offneg4:
; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:-4{{$}}
define amdgpu_kernel void @global_atomic_pk_add_v2f16_offneg4(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
main_body:
  %p = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 -1
  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data)
  ret void
}

; Make sure this artificially selects with an incorrect subtarget, but
; the feature set.
; GCN-LABEL: {{^}}global_atomic_fadd_f32_wrong_subtarget:
; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
define amdgpu_kernel void @global_atomic_fadd_f32_wrong_subtarget(float addrspace(1)* %ptr, float %data) #0 {
  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data)
  ret void
}

attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" }
[AMDGPU] gfx908 atomic fadd and atomic pk_fadd Differential Revision: https://reviews.llvm.org/D64435 llvm-svn: 365717 2019-07-11 08:10:17 +08:00			`; RUN: llc < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs \| FileCheck %s -check-prefix=GCN`
[AMDGPU] gfx90a support Differential Revision: https://reviews.llvm.org/D96906 2021-02-18 05:37:46 +08:00			`; RUN: llc < %s -march=amdgcn -mcpu=gfx90a -verify-machineinstrs \| FileCheck %s -check-prefix=GCN`
[AMDGPU] gfx908 atomic fadd and atomic pk_fadd Differential Revision: https://reviews.llvm.org/D64435 llvm-svn: 365717 2019-07-11 08:10:17 +08:00
[AMDGPU] Unify intrinsic ret/nortn interface We have a single noret intrinsic an a lot of special handling around it. Declare it just as any other but do not define rtn instructions itself instead. Differential Revision: https://reviews.llvm.org/D87719 2020-09-11 06:10:52 +08:00			`declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1)`
			`declare <2 x half> @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i1)`
			`declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)*, float)`
			`declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)*, <2 x half>)`
[AMDGPU] gfx908 atomic fadd and atomic pk_fadd Differential Revision: https://reviews.llvm.org/D64435 llvm-svn: 365717 2019-07-11 08:10:17 +08:00
			`; GCN-LABEL: {{^}}buffer_atomic_add_f32:`
			`; GCN: buffer_atomic_add_f32 v0, v1, s[0:3], 0 idxen`
			`define amdgpu_ps void @buffer_atomic_add_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {`
			`main_body:`
[AMDGPU] Unify intrinsic ret/nortn interface We have a single noret intrinsic an a lot of special handling around it. Declare it just as any other but do not define rtn instructions itself instead. Differential Revision: https://reviews.llvm.org/D87719 2020-09-11 06:10:52 +08:00			`%ret = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)`
[AMDGPU] gfx908 atomic fadd and atomic pk_fadd Differential Revision: https://reviews.llvm.org/D64435 llvm-svn: 365717 2019-07-11 08:10:17 +08:00			`ret void`
			`}`

			`; GCN-LABEL: {{^}}buffer_atomic_add_f32_off4_slc:`
			`; GCN: buffer_atomic_add_f32 v0, v1, s[0:3], 0 idxen offset:4 slc`
			`define amdgpu_ps void @buffer_atomic_add_f32_off4_slc(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {`
			`main_body:`
[AMDGPU] Unify intrinsic ret/nortn interface We have a single noret intrinsic an a lot of special handling around it. Declare it just as any other but do not define rtn instructions itself instead. Differential Revision: https://reviews.llvm.org/D87719 2020-09-11 06:10:52 +08:00			`%ret = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1)`
[AMDGPU] gfx908 atomic fadd and atomic pk_fadd Differential Revision: https://reviews.llvm.org/D64435 llvm-svn: 365717 2019-07-11 08:10:17 +08:00			`ret void`
			`}`

			`; GCN-LABEL: {{^}}buffer_atomic_pk_add_v2f16:`
			`; GCN: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 idxen`
			`define amdgpu_ps void @buffer_atomic_pk_add_v2f16(<4 x i32> inreg %rsrc, <2 x half> %data, i32 %vindex) {`
			`main_body:`
[AMDGPU] Unify intrinsic ret/nortn interface We have a single noret intrinsic an a lot of special handling around it. Declare it just as any other but do not define rtn instructions itself instead. Differential Revision: https://reviews.llvm.org/D87719 2020-09-11 06:10:52 +08:00			`%ret = call <2 x half> @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)`
[AMDGPU] gfx908 atomic fadd and atomic pk_fadd Differential Revision: https://reviews.llvm.org/D64435 llvm-svn: 365717 2019-07-11 08:10:17 +08:00			`ret void`
			`}`

			`; GCN-LABEL: {{^}}buffer_atomic_pk_add_v2f16_off4_slc:`
			`; GCN: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 idxen offset:4 slc`
			`define amdgpu_ps void @buffer_atomic_pk_add_v2f16_off4_slc(<4 x i32> inreg %rsrc, <2 x half> %data, i32 %vindex) {`
			`main_body:`
[AMDGPU] Unify intrinsic ret/nortn interface We have a single noret intrinsic an a lot of special handling around it. Declare it just as any other but do not define rtn instructions itself instead. Differential Revision: https://reviews.llvm.org/D87719 2020-09-11 06:10:52 +08:00			`%ret = call <2 x half> @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1)`
[AMDGPU] gfx908 atomic fadd and atomic pk_fadd Differential Revision: https://reviews.llvm.org/D64435 llvm-svn: 365717 2019-07-11 08:10:17 +08:00			`ret void`
			`}`

			`; GCN-LABEL: {{^}}global_atomic_add_f32:`
AMDGPU: Select global saddr mode from SGPR pointer Use the 64-bit SGPR base with a 0 offset, since it's 1 fewer instruction to materialize the 0 vs. the 64-bit copy. 2020-11-11 00:06:59 +08:00			`; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}`
[AMDGPU] gfx908 atomic fadd and atomic pk_fadd Differential Revision: https://reviews.llvm.org/D64435 llvm-svn: 365717 2019-07-11 08:10:17 +08:00			`define amdgpu_kernel void @global_atomic_add_f32(float addrspace(1)* %ptr, float %data) {`
			`main_body:`
[AMDGPU] Unify intrinsic ret/nortn interface We have a single noret intrinsic an a lot of special handling around it. Declare it just as any other but do not define rtn instructions itself instead. Differential Revision: https://reviews.llvm.org/D87719 2020-09-11 06:10:52 +08:00			`%ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data)`
[AMDGPU] gfx908 atomic fadd and atomic pk_fadd Differential Revision: https://reviews.llvm.org/D64435 llvm-svn: 365717 2019-07-11 08:10:17 +08:00			`ret void`
			`}`

			`; GCN-LABEL: {{^}}global_atomic_add_f32_off4:`
AMDGPU: Select global saddr mode from SGPR pointer Use the 64-bit SGPR base with a 0 offset, since it's 1 fewer instruction to materialize the 0 vs. the 64-bit copy. 2020-11-11 00:06:59 +08:00			`; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4`
[AMDGPU] gfx908 atomic fadd and atomic pk_fadd Differential Revision: https://reviews.llvm.org/D64435 llvm-svn: 365717 2019-07-11 08:10:17 +08:00			`define amdgpu_kernel void @global_atomic_add_f32_off4(float addrspace(1)* %ptr, float %data) {`
			`main_body:`
			`%p = getelementptr float, float addrspace(1)* %ptr, i64 1`
[AMDGPU] Unify intrinsic ret/nortn interface We have a single noret intrinsic an a lot of special handling around it. Declare it just as any other but do not define rtn instructions itself instead. Differential Revision: https://reviews.llvm.org/D87719 2020-09-11 06:10:52 +08:00			`%ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %p, float %data)`
[AMDGPU] gfx908 atomic fadd and atomic pk_fadd Differential Revision: https://reviews.llvm.org/D64435 llvm-svn: 365717 2019-07-11 08:10:17 +08:00			`ret void`
			`}`

AMDGPU: Fix using wrong offsets for global atomic fadd intrinsics Global instructions have the signed offsets. 2020-08-15 10:19:22 +08:00			`; GCN-LABEL: {{^}}global_atomic_add_f32_offneg4:`
AMDGPU: Select global saddr mode from SGPR pointer Use the 64-bit SGPR base with a 0 offset, since it's 1 fewer instruction to materialize the 0 vs. the 64-bit copy. 2020-11-11 00:06:59 +08:00			`; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:-4`
AMDGPU: Fix using wrong offsets for global atomic fadd intrinsics Global instructions have the signed offsets. 2020-08-15 10:19:22 +08:00			`define amdgpu_kernel void @global_atomic_add_f32_offneg4(float addrspace(1)* %ptr, float %data) {`
			`main_body:`
			`%p = getelementptr float, float addrspace(1)* %ptr, i64 -1`
[AMDGPU] Unify intrinsic ret/nortn interface We have a single noret intrinsic an a lot of special handling around it. Declare it just as any other but do not define rtn instructions itself instead. Differential Revision: https://reviews.llvm.org/D87719 2020-09-11 06:10:52 +08:00			`%ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %p, float %data)`
AMDGPU: Fix using wrong offsets for global atomic fadd intrinsics Global instructions have the signed offsets. 2020-08-15 10:19:22 +08:00			`ret void`
			`}`

[AMDGPU] gfx908 atomic fadd and atomic pk_fadd Differential Revision: https://reviews.llvm.org/D64435 llvm-svn: 365717 2019-07-11 08:10:17 +08:00			`; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16:`
AMDGPU: Select global saddr mode from SGPR pointer Use the 64-bit SGPR base with a 0 offset, since it's 1 fewer instruction to materialize the 0 vs. the 64-bit copy. 2020-11-11 00:06:59 +08:00			`; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}`
[AMDGPU] gfx908 atomic fadd and atomic pk_fadd Differential Revision: https://reviews.llvm.org/D64435 llvm-svn: 365717 2019-07-11 08:10:17 +08:00			`define amdgpu_kernel void @global_atomic_pk_add_v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {`
			`main_body:`
[AMDGPU] Unify intrinsic ret/nortn interface We have a single noret intrinsic an a lot of special handling around it. Declare it just as any other but do not define rtn instructions itself instead. Differential Revision: https://reviews.llvm.org/D87719 2020-09-11 06:10:52 +08:00			`%ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data)`
[AMDGPU] gfx908 atomic fadd and atomic pk_fadd Differential Revision: https://reviews.llvm.org/D64435 llvm-svn: 365717 2019-07-11 08:10:17 +08:00			`ret void`
			`}`

			`; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_off4:`
AMDGPU: Select global saddr mode from SGPR pointer Use the 64-bit SGPR base with a 0 offset, since it's 1 fewer instruction to materialize the 0 vs. the 64-bit copy. 2020-11-11 00:06:59 +08:00			`; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4`
[AMDGPU] gfx908 atomic fadd and atomic pk_fadd Differential Revision: https://reviews.llvm.org/D64435 llvm-svn: 365717 2019-07-11 08:10:17 +08:00			`define amdgpu_kernel void @global_atomic_pk_add_v2f16_off4(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {`
			`main_body:`
			`%p = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 1`
[AMDGPU] Unify intrinsic ret/nortn interface We have a single noret intrinsic an a lot of special handling around it. Declare it just as any other but do not define rtn instructions itself instead. Differential Revision: https://reviews.llvm.org/D87719 2020-09-11 06:10:52 +08:00			`%ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data)`
[AMDGPU] gfx908 atomic fadd and atomic pk_fadd Differential Revision: https://reviews.llvm.org/D64435 llvm-svn: 365717 2019-07-11 08:10:17 +08:00			`ret void`
			`}`
AMDGPU: Fix overriding global FP atomic feature predicates Global TableGen let override blocks are pretty dangerous and override any local special cases. In this case, the broader HasFlatGlobalInsts was overriding the more specific predicate for FeatureAtomicFaddInsts. Make sure HasFlatGlobalInsts is implied by FeatureAtomicFaddInsts, and make sure the right predicate is used. One issue with independently setting the subtarget features on incompatible targets is all of the encoding families do not define all opcodes. This will hit an assert on gfx10 for example, since we set the encoding independently based on the generation and not based on a feature. 2020-06-05 02:37:39 +08:00
AMDGPU: Fix using wrong offsets for global atomic fadd intrinsics Global instructions have the signed offsets. 2020-08-15 10:19:22 +08:00			`; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_offneg4:`
AMDGPU: Select global saddr mode from SGPR pointer Use the 64-bit SGPR base with a 0 offset, since it's 1 fewer instruction to materialize the 0 vs. the 64-bit copy. 2020-11-11 00:06:59 +08:00			`; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:-4{{$}}`
AMDGPU: Fix using wrong offsets for global atomic fadd intrinsics Global instructions have the signed offsets. 2020-08-15 10:19:22 +08:00			`define amdgpu_kernel void @global_atomic_pk_add_v2f16_offneg4(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {`
			`main_body:`
			`%p = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 -1`
[AMDGPU] Unify intrinsic ret/nortn interface We have a single noret intrinsic an a lot of special handling around it. Declare it just as any other but do not define rtn instructions itself instead. Differential Revision: https://reviews.llvm.org/D87719 2020-09-11 06:10:52 +08:00			`%ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data)`
AMDGPU: Fix using wrong offsets for global atomic fadd intrinsics Global instructions have the signed offsets. 2020-08-15 10:19:22 +08:00			`ret void`
			`}`

AMDGPU: Fix overriding global FP atomic feature predicates Global TableGen let override blocks are pretty dangerous and override any local special cases. In this case, the broader HasFlatGlobalInsts was overriding the more specific predicate for FeatureAtomicFaddInsts. Make sure HasFlatGlobalInsts is implied by FeatureAtomicFaddInsts, and make sure the right predicate is used. One issue with independently setting the subtarget features on incompatible targets is all of the encoding families do not define all opcodes. This will hit an assert on gfx10 for example, since we set the encoding independently based on the generation and not based on a feature. 2020-06-05 02:37:39 +08:00			`; Make sure this artificially selects with an incorrect subtarget, but`
			`; the feature set.`
			`; GCN-LABEL: {{^}}global_atomic_fadd_f32_wrong_subtarget:`
AMDGPU: Select global saddr mode from SGPR pointer Use the 64-bit SGPR base with a 0 offset, since it's 1 fewer instruction to materialize the 0 vs. the 64-bit copy. 2020-11-11 00:06:59 +08:00			`; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}`
AMDGPU: Fix overriding global FP atomic feature predicates Global TableGen let override blocks are pretty dangerous and override any local special cases. In this case, the broader HasFlatGlobalInsts was overriding the more specific predicate for FeatureAtomicFaddInsts. Make sure HasFlatGlobalInsts is implied by FeatureAtomicFaddInsts, and make sure the right predicate is used. One issue with independently setting the subtarget features on incompatible targets is all of the encoding families do not define all opcodes. This will hit an assert on gfx10 for example, since we set the encoding independently based on the generation and not based on a feature. 2020-06-05 02:37:39 +08:00			`define amdgpu_kernel void @global_atomic_fadd_f32_wrong_subtarget(float addrspace(1)* %ptr, float %data) #0 {`
[AMDGPU] Unify intrinsic ret/nortn interface We have a single noret intrinsic an a lot of special handling around it. Declare it just as any other but do not define rtn instructions itself instead. Differential Revision: https://reviews.llvm.org/D87719 2020-09-11 06:10:52 +08:00			`%ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data)`
AMDGPU: Fix overriding global FP atomic feature predicates Global TableGen let override blocks are pretty dangerous and override any local special cases. In this case, the broader HasFlatGlobalInsts was overriding the more specific predicate for FeatureAtomicFaddInsts. Make sure HasFlatGlobalInsts is implied by FeatureAtomicFaddInsts, and make sure the right predicate is used. One issue with independently setting the subtarget features on incompatible targets is all of the encoding families do not define all opcodes. This will hit an assert on gfx10 for example, since we set the encoding independently based on the generation and not based on a feature. 2020-06-05 02:37:39 +08:00			`ret void`
			`}`

			`attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" }`