forked from OSchip/llvm-project
172 lines
5.7 KiB
YAML
172 lines
5.7 KiB
YAML
# RUN: llc -run-pass si-insert-waits -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s
|
|
--- |
|
|
|
|
define amdgpu_kernel void @vccz_corrupt_workaround(float %cond, i32 addrspace(1)* %out) #0 {
|
|
entry:
|
|
%cmp0 = fcmp oeq float %cond, 0.000000e+00
|
|
br i1 %cmp0, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
|
|
|
|
else: ; preds = %entry
|
|
store volatile i32 100, i32 addrspace(1)* undef
|
|
br label %done, !structurizecfg.uniform !0
|
|
|
|
if: ; preds = %entry
|
|
store volatile i32 9, i32 addrspace(1)* undef
|
|
br label %done, !structurizecfg.uniform !0
|
|
|
|
done: ; preds = %if, %else
|
|
%value = phi i32 [ 0, %if ], [ 1, %else ]
|
|
store i32 %value, i32 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @vccz_corrupt_undef_vcc(float %cond, i32 addrspace(1)* %out) #0 {
|
|
entry:
|
|
br i1 undef, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
|
|
|
|
else: ; preds = %entry
|
|
store volatile i32 100, i32 addrspace(1)* undef
|
|
br label %done, !structurizecfg.uniform !0
|
|
|
|
if: ; preds = %entry
|
|
store volatile i32 9, i32 addrspace(1)* undef
|
|
br label %done, !structurizecfg.uniform !0
|
|
|
|
done: ; preds = %if, %else
|
|
%value = phi i32 [ 0, %if ], [ 1, %else ]
|
|
store i32 %value, i32 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { readnone }
|
|
|
|
!0 = !{}
|
|
|
|
...
|
|
---
|
|
# CHECK-LABEL: name: vccz_corrupt_workaround
|
|
# CHECK: %vcc = V_CMP_EQ_F32
|
|
# CHECK-NEXT: %vcc = S_MOV_B64 %vcc
|
|
# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2.else, implicit killed %vcc
|
|
|
|
name: vccz_corrupt_workaround
|
|
alignment: 0
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
tracksRegLiveness: true
|
|
liveins:
|
|
- { reg: '%sgpr0_sgpr1' }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
maxCallFrameSize: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0.entry:
|
|
liveins: %sgpr0_sgpr1
|
|
|
|
%sgpr2 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 9, 0 :: (non-temporal dereferenceable invariant load 4 from `float addrspace(2)* undef`)
|
|
%sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
|
|
%sgpr7 = S_MOV_B32 61440
|
|
%sgpr6 = S_MOV_B32 -1
|
|
%vcc = V_CMP_EQ_F32_e64 0, 0, 0, %sgpr2, 0, implicit %exec
|
|
S_CBRANCH_VCCZ %bb.1.else, implicit killed %vcc
|
|
|
|
bb.2.if:
|
|
liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
|
|
|
%vgpr0 = V_MOV_B32_e32 9, implicit %exec
|
|
BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
|
%vgpr0 = V_MOV_B32_e32 0, implicit %exec
|
|
S_BRANCH %bb.3.done
|
|
|
|
bb.1.else:
|
|
liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
|
|
|
%vgpr0 = V_MOV_B32_e32 100, implicit %exec
|
|
BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
|
%vgpr0 = V_MOV_B32_e32 1, implicit %exec
|
|
|
|
bb.3.done:
|
|
liveins: %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
|
|
|
%sgpr3 = S_MOV_B32 61440
|
|
%sgpr2 = S_MOV_B32 -1
|
|
BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.out)
|
|
S_ENDPGM
|
|
|
|
...
|
|
---
|
|
# CHECK-LABEL: name: vccz_corrupt_undef_vcc
|
|
# CHECK: S_WAITCNT
|
|
# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2.else, implicit undef %vcc
|
|
|
|
name: vccz_corrupt_undef_vcc
|
|
alignment: 0
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
tracksRegLiveness: true
|
|
liveins:
|
|
- { reg: '%sgpr0_sgpr1' }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
maxCallFrameSize: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0.entry:
|
|
liveins: %sgpr0_sgpr1
|
|
|
|
%sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
|
|
%sgpr7 = S_MOV_B32 61440
|
|
%sgpr6 = S_MOV_B32 -1
|
|
S_CBRANCH_VCCZ %bb.1.else, implicit undef %vcc
|
|
|
|
bb.2.if:
|
|
liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
|
|
|
%vgpr0 = V_MOV_B32_e32 9, implicit %exec
|
|
BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
|
%vgpr0 = V_MOV_B32_e32 0, implicit %exec
|
|
S_BRANCH %bb.3.done
|
|
|
|
bb.1.else:
|
|
liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
|
|
|
%vgpr0 = V_MOV_B32_e32 100, implicit %exec
|
|
BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
|
%vgpr0 = V_MOV_B32_e32 1, implicit %exec
|
|
|
|
bb.3.done:
|
|
liveins: %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
|
|
|
%sgpr3 = S_MOV_B32 61440
|
|
%sgpr2 = S_MOV_B32 -1
|
|
BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.out)
|
|
S_ENDPGM
|
|
|
|
...
|