forked from OSchip/llvm-project
AMDGPU/GlobalISel: Stop handling llvm.amdgcn.buffer.atomic.fadd
This code is not structured to handle the legacy buffer intrinsics and was miscompiling them.
This commit is contained in:
parent
8ff3c9e0be
commit
be7e938e27
|
@ -4167,7 +4167,6 @@ static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) {
|
|||
case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap:
|
||||
case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap:
|
||||
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP;
|
||||
case Intrinsic::amdgcn_buffer_atomic_fadd:
|
||||
case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
|
||||
case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
|
||||
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD;
|
||||
|
@ -5186,7 +5185,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
|
|||
case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
|
||||
case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap:
|
||||
case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap:
|
||||
case Intrinsic::amdgcn_buffer_atomic_fadd:
|
||||
case Intrinsic::amdgcn_raw_buffer_atomic_fmin:
|
||||
case Intrinsic::amdgcn_struct_buffer_atomic_fmin:
|
||||
case Intrinsic::amdgcn_raw_buffer_atomic_fmax:
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX90A
|
||||
|
||||
declare double @llvm.amdgcn.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i1)
|
||||
declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
|
||||
declare double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32 immarg)
|
||||
declare double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
|
||||
|
@ -16,56 +15,6 @@ declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double
|
|||
declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data)
|
||||
declare double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* nocapture, double, i32, i32, i1)
|
||||
|
||||
define amdgpu_kernel void @buffer_atomic_add_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
|
||||
; GFX90A-LABEL: buffer_atomic_add_noret_f64:
|
||||
; GFX90A: ; %bb.0: ; %main_body
|
||||
; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
|
||||
; GFX90A-NEXT: s_load_dword s8, s[0:1], 0x3c
|
||||
; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, s8
|
||||
; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 offen glc
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
%ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
|
||||
; GFX90A-LABEL: buffer_atomic_add_rtn_f64:
|
||||
; GFX90A: ; %bb.0: ; %main_body
|
||||
; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen glc
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
%ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
|
||||
store double %ret, double* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) {
|
||||
; GFX90A-LABEL: buffer_atomic_add_rtn_f64_off4_slc:
|
||||
; GFX90A: ; %bb.0: ; %main_body
|
||||
; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
|
||||
; GFX90A-NEXT: s_load_dword s10, s[0:1], 0x3c
|
||||
; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
|
||||
; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x44
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, s10
|
||||
; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 4 offen glc slc scc /* unexpected cache policy bit */
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
%ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1)
|
||||
store double %ret, double addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
|
||||
; GFX90A-LABEL: raw_buffer_atomic_add_noret_f64:
|
||||
; GFX90A: ; %bb.0: ; %main_body
|
||||
|
@ -418,7 +367,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(double addrspace(1)*
|
|||
; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
|
||||
; GFX90A-NEXT: .LBB24_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB21_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
|
@ -431,7 +380,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(double addrspace(1)*
|
|||
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB24_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB21_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
|
@ -466,7 +415,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrsp
|
|||
; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
|
||||
; GFX90A-NEXT: .LBB26_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB23_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
|
||||
; GFX90A-NEXT: buffer_wbl2
|
||||
|
@ -479,7 +428,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrsp
|
|||
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB26_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB23_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
|
@ -522,7 +471,7 @@ define double @global_atomic_fadd_f64_rtn_pat(double addrspace(1)* %ptr, double
|
|||
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
|
||||
; GFX90A-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX90A-NEXT: .LBB29_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB26_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
|
||||
|
@ -536,7 +485,7 @@ define double @global_atomic_fadd_f64_rtn_pat(double addrspace(1)* %ptr, double
|
|||
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
|
||||
; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB29_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB26_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
|
||||
|
@ -569,7 +518,7 @@ define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr,
|
|||
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
|
||||
; GFX90A-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX90A-NEXT: .LBB31_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB28_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
|
||||
|
@ -583,7 +532,7 @@ define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr,
|
|||
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
|
||||
; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB31_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB28_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
|
||||
|
@ -628,7 +577,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(double ad
|
|||
; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
|
||||
; GFX90A-NEXT: .LBB34_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB31_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
|
@ -639,7 +588,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(double ad
|
|||
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB34_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB31_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
|
@ -655,7 +604,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(double* %ptr) #1 {
|
|||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
|
||||
; GFX90A-NEXT: .LBB35_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB32_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
|
||||
|
@ -670,7 +619,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(double* %ptr) #1 {
|
|||
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB35_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB32_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
|
@ -704,7 +653,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) #
|
|||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
|
||||
; GFX90A-NEXT: .LBB37_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB34_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
|
||||
|
@ -720,7 +669,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) #
|
|||
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB37_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB34_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
|
@ -734,7 +683,7 @@ define double @flat_atomic_fadd_f64_rtn_pat(double* %ptr) #1 {
|
|||
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
|
||||
; GFX90A-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX90A-NEXT: .LBB38_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB35_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
|
||||
|
@ -748,7 +697,7 @@ define double @flat_atomic_fadd_f64_rtn_pat(double* %ptr) #1 {
|
|||
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
|
||||
; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB38_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB35_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
|
||||
|
@ -781,7 +730,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 {
|
|||
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
|
||||
; GFX90A-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX90A-NEXT: .LBB40_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB37_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
|
||||
|
@ -796,7 +745,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 {
|
|||
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
|
||||
; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB40_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB37_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
|
||||
|
@ -841,7 +790,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(double* %pt
|
|||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
|
||||
; GFX90A-NEXT: .LBB43_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB40_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
|
||||
|
@ -854,7 +803,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(double* %pt
|
|||
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB43_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB40_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
|
@ -985,7 +934,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(double add
|
|||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX90A-NEXT: ds_read_b64 v[0:1], v0
|
||||
; GFX90A-NEXT: .LBB52_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: .LBB49_1: ; %atomicrmw.start
|
||||
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: v_add_f64 v[2:3], v[0:1], 4.0
|
||||
|
@ -997,7 +946,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(double add
|
|||
; GFX90A-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
|
||||
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[0,1]
|
||||
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB52_1
|
||||
; GFX90A-NEXT: s_cbranch_execnz .LBB49_1
|
||||
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
main_body:
|
||||
|
|
Loading…
Reference in New Issue