forked from OSchip/llvm-project
172 lines
5.6 KiB
YAML
172 lines
5.6 KiB
YAML
# RUN: llc -run-pass si-insert-waitcnts -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s
|
|
--- |
|
|
|
|
define amdgpu_kernel void @vccz_corrupt_workaround(float %cond, i32 addrspace(1)* %out) #0 {
|
|
entry:
|
|
%cmp0 = fcmp oeq float %cond, 0.000000e+00
|
|
br i1 %cmp0, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
|
|
|
|
else: ; preds = %entry
|
|
store volatile i32 100, i32 addrspace(1)* undef
|
|
br label %done, !structurizecfg.uniform !0
|
|
|
|
if: ; preds = %entry
|
|
store volatile i32 9, i32 addrspace(1)* undef
|
|
br label %done, !structurizecfg.uniform !0
|
|
|
|
done: ; preds = %if, %else
|
|
%value = phi i32 [ 0, %if ], [ 1, %else ]
|
|
store i32 %value, i32 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @vccz_corrupt_undef_vcc(float %cond, i32 addrspace(1)* %out) #0 {
|
|
entry:
|
|
br i1 undef, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
|
|
|
|
else: ; preds = %entry
|
|
store volatile i32 100, i32 addrspace(1)* undef
|
|
br label %done, !structurizecfg.uniform !0
|
|
|
|
if: ; preds = %entry
|
|
store volatile i32 9, i32 addrspace(1)* undef
|
|
br label %done, !structurizecfg.uniform !0
|
|
|
|
done: ; preds = %if, %else
|
|
%value = phi i32 [ 0, %if ], [ 1, %else ]
|
|
store i32 %value, i32 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { readnone }
|
|
|
|
!0 = !{}
|
|
|
|
...
|
|
---
|
|
# CHECK-LABEL: name: vccz_corrupt_workaround
|
|
# CHECK: $vcc = V_CMP_EQ_F32
|
|
# CHECK-NEXT: S_WAITCNT 0
|
|
# CHECK-NEXT: $vcc = S_MOV_B64 $vcc
|
|
# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit killed $vcc
|
|
|
|
name: vccz_corrupt_workaround
|
|
alignment: 0
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
tracksRegLiveness: true
|
|
liveins:
|
|
- { reg: '$sgpr0_sgpr1' }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
maxCallFrameSize: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0.entry:
|
|
liveins: $sgpr0_sgpr1, $vcc
|
|
|
|
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
|
$sgpr7 = S_MOV_B32 61440
|
|
$sgpr6 = S_MOV_B32 -1
|
|
$vcc = V_CMP_EQ_F32_e64 0, 0, 0, undef $sgpr2, 0, implicit $exec
|
|
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
|
|
|
bb.2.if:
|
|
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
|
|
|
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
S_BRANCH %bb.3
|
|
|
|
bb.1.else:
|
|
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
|
|
|
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
|
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
|
|
|
bb.3.done:
|
|
liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
|
|
|
$sgpr3 = S_MOV_B32 61440
|
|
$sgpr2 = S_MOV_B32 -1
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
|
|
S_ENDPGM
|
|
|
|
...
|
|
---
|
|
# CHECK-LABEL: name: vccz_corrupt_undef_vcc
|
|
# CHECK: S_WAITCNT 3855
|
|
# CHECK-NEXT: $vgpr0 = V_MOV_B32_e32
|
|
|
|
name: vccz_corrupt_undef_vcc
|
|
alignment: 0
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
tracksRegLiveness: true
|
|
liveins:
|
|
- { reg: '$sgpr0_sgpr1' }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
maxCallFrameSize: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0.entry:
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
|
$sgpr7 = S_MOV_B32 61440
|
|
$sgpr6 = S_MOV_B32 -1
|
|
S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
|
|
|
|
bb.2.if:
|
|
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
|
|
|
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
S_BRANCH %bb.3
|
|
|
|
bb.1.else:
|
|
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
|
|
|
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
|
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
|
|
|
bb.3.done:
|
|
liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
|
|
|
$sgpr3 = S_MOV_B32 61440
|
|
$sgpr2 = S_MOV_B32 -1
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
|
|
S_ENDPGM
|
|
|
|
...
|