llvm-project/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround...

172 lines
5.7 KiB
Plaintext
Raw Normal View History

# RUN: llc -run-pass si-insert-waits -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s
--- |
define amdgpu_kernel void @vccz_corrupt_workaround(float %cond, i32 addrspace(1)* %out) #0 {
entry:
%cmp0 = fcmp oeq float %cond, 0.000000e+00
br i1 %cmp0, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
else: ; preds = %entry
store volatile i32 100, i32 addrspace(1)* undef
br label %done, !structurizecfg.uniform !0
if: ; preds = %entry
store volatile i32 9, i32 addrspace(1)* undef
br label %done, !structurizecfg.uniform !0
done: ; preds = %if, %else
%value = phi i32 [ 0, %if ], [ 1, %else ]
store i32 %value, i32 addrspace(1)* %out
ret void
}
define amdgpu_kernel void @vccz_corrupt_undef_vcc(float %cond, i32 addrspace(1)* %out) #0 {
entry:
br i1 undef, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
else: ; preds = %entry
store volatile i32 100, i32 addrspace(1)* undef
br label %done, !structurizecfg.uniform !0
if: ; preds = %entry
store volatile i32 9, i32 addrspace(1)* undef
br label %done, !structurizecfg.uniform !0
done: ; preds = %if, %else
%value = phi i32 [ 0, %if ], [ 1, %else ]
store i32 %value, i32 addrspace(1)* %out
ret void
}
attributes #0 = { nounwind }
attributes #1 = { readnone }
!0 = !{}
...
---
# CHECK-LABEL: name: vccz_corrupt_workaround
# CHECK: %vcc = V_CMP_EQ_F32
# CHECK-NEXT: %vcc = S_MOV_B64 %vcc
# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit killed %vcc
name: vccz_corrupt_workaround
alignment: 0
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
liveins:
- { reg: '%sgpr0_sgpr1' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
maxCallFrameSize: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
body: |
bb.0.entry:
liveins: %sgpr0_sgpr1
%sgpr2 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 9, 0 :: (non-temporal dereferenceable invariant load 4 from `float addrspace(2)* undef`)
%sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
%sgpr7 = S_MOV_B32 61440
%sgpr6 = S_MOV_B32 -1
%vcc = V_CMP_EQ_F32_e64 0, 0, 0, %sgpr2, 0, implicit %exec
S_CBRANCH_VCCZ %bb.1, implicit killed %vcc
bb.2.if:
liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
%vgpr0 = V_MOV_B32_e32 9, implicit %exec
BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
%vgpr0 = V_MOV_B32_e32 0, implicit %exec
S_BRANCH %bb.3
bb.1.else:
liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
%vgpr0 = V_MOV_B32_e32 100, implicit %exec
BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
%vgpr0 = V_MOV_B32_e32 1, implicit %exec
bb.3.done:
liveins: %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
%sgpr3 = S_MOV_B32 61440
%sgpr2 = S_MOV_B32 -1
BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.out)
S_ENDPGM
...
---
# CHECK-LABEL: name: vccz_corrupt_undef_vcc
# CHECK: S_WAITCNT
# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit undef %vcc
name: vccz_corrupt_undef_vcc
alignment: 0
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
liveins:
- { reg: '%sgpr0_sgpr1' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
maxCallFrameSize: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
body: |
bb.0.entry:
liveins: %sgpr0_sgpr1
%sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
%sgpr7 = S_MOV_B32 61440
%sgpr6 = S_MOV_B32 -1
S_CBRANCH_VCCZ %bb.1, implicit undef %vcc
bb.2.if:
liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
%vgpr0 = V_MOV_B32_e32 9, implicit %exec
BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
%vgpr0 = V_MOV_B32_e32 0, implicit %exec
S_BRANCH %bb.3
bb.1.else:
liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
%vgpr0 = V_MOV_B32_e32 100, implicit %exec
BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
%vgpr0 = V_MOV_B32_e32 1, implicit %exec
bb.3.done:
liveins: %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
%sgpr3 = S_MOV_B32 61440
%sgpr2 = S_MOV_B32 -1
BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.out)
S_ENDPGM
...