[AMDGPU] Fix whole wavefront mode

We cannot move wwm over exec copies because the exec register needs an exact exec mask.

Differential Revision: https://reviews.llvm.org/D76232
This commit is contained in:
Sebastian Neubauer 2020-03-16 14:33:32 +01:00
parent 1f93b162fc
commit 6e29846b29
3 changed files with 64 additions and 61 deletions

View File

@ -171,8 +171,6 @@ private:
void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
char analyzeFunction(MachineFunction &MF);
bool requiresCorrectState(const MachineInstr &MI) const;
MachineBasicBlock::iterator saveSCC(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Before);
MachineBasicBlock::iterator
@ -526,36 +524,6 @@ char SIWholeQuadMode::analyzeFunction(MachineFunction &MF) {
return GlobalFlags;
}
/// Whether \p MI really requires the exec state computed during analysis.
///
/// Scalar instructions must occasionally be marked WQM for correct propagation
/// (e.g. thread masks leading up to branches), but when it comes to actual
/// execution, they don't care about EXEC.
bool SIWholeQuadMode::requiresCorrectState(const MachineInstr &MI) const {
if (MI.isTerminator())
return true;
// Skip instructions that are not affected by EXEC
if (TII->isScalarUnit(MI))
return false;
// Generic instructions such as COPY will either disappear by register
// coalescing or be lowered to SALU or VALU instructions.
if (MI.isTransient()) {
if (MI.getNumExplicitOperands() >= 1) {
const MachineOperand &Op = MI.getOperand(0);
if (Op.isReg()) {
if (TRI->isSGPRReg(*MRI, Op.getReg())) {
// SGPR instructions are not affected by EXEC
return false;
}
}
}
}
return true;
}
MachineBasicBlock::iterator
SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Before) {
@ -742,7 +710,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
if (II != IE) {
MachineInstr &MI = *II;
if (requiresCorrectState(MI)) {
if (MI.isTerminator() || TII->mayReadEXEC(*MRI, MI)) {
auto III = Instructions.find(&MI);
if (III != Instructions.end()) {
if (III->second.Needs & StateWWM)

View File

@ -375,9 +375,9 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
;
; GFX8-LABEL: add_i32_varying:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v2, v0
; GFX8-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: s_mov_b64 exec, s[2:3]
; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -428,9 +428,9 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
;
; GFX9-LABEL: add_i32_varying:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -480,9 +480,9 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1064-LABEL: add_i32_varying:
; GFX1064: ; %bb.0: ; %entry
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v2, v0
; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: s_mov_b64 exec, s[2:3]
; GFX1064-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -539,10 +539,10 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1032-LABEL: add_i32_varying:
; GFX1032: ; %bb.0: ; %entry
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: ; implicit-def: $vcc_hi
; GFX1032-NEXT: v_mov_b32_e32 v2, v0
; GFX1032-NEXT: s_or_saveexec_b32 s2, -1
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: s_mov_b32 exec_lo, s2
; GFX1032-NEXT: v_cmp_ne_u32_e64 s2, 1, 0
@ -614,9 +614,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
;
; GFX8-LABEL: add_i32_varying_gfx1032:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v2, v0
; GFX8-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: s_mov_b64 exec, s[2:3]
; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -667,9 +667,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
;
; GFX9-LABEL: add_i32_varying_gfx1032:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -719,9 +719,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
;
; GFX1064-LABEL: add_i32_varying_gfx1032:
; GFX1064: ; %bb.0: ; %entry
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v2, v0
; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: s_mov_b64 exec, s[2:3]
; GFX1064-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -778,10 +778,10 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
;
; GFX1032-LABEL: add_i32_varying_gfx1032:
; GFX1032: ; %bb.0: ; %entry
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: ; implicit-def: $vcc_hi
; GFX1032-NEXT: v_mov_b32_e32 v2, v0
; GFX1032-NEXT: s_or_saveexec_b32 s2, -1
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: s_mov_b32 exec_lo, s2
; GFX1032-NEXT: v_cmp_ne_u32_e64 s2, 1, 0
@ -853,9 +853,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
;
; GFX8-LABEL: add_i32_varying_gfx1064:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v2, v0
; GFX8-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: s_mov_b64 exec, s[2:3]
; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -906,9 +906,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
;
; GFX9-LABEL: add_i32_varying_gfx1064:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -958,9 +958,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
;
; GFX1064-LABEL: add_i32_varying_gfx1064:
; GFX1064: ; %bb.0: ; %entry
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v2, v0
; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: s_mov_b64 exec, s[2:3]
; GFX1064-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -1017,10 +1017,10 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
;
; GFX1032-LABEL: add_i32_varying_gfx1064:
; GFX1032: ; %bb.0: ; %entry
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: ; implicit-def: $vcc_hi
; GFX1032-NEXT: v_mov_b32_e32 v2, v0
; GFX1032-NEXT: s_or_saveexec_b32 s2, -1
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: s_mov_b32 exec_lo, s2
; GFX1032-NEXT: v_cmp_ne_u32_e64 s2, 1, 0
@ -1934,9 +1934,9 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
;
; GFX8-LABEL: sub_i32_varying:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v2, v0
; GFX8-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: s_mov_b64 exec, s[2:3]
; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -1987,9 +1987,9 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
;
; GFX9-LABEL: sub_i32_varying:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -2039,9 +2039,9 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1064-LABEL: sub_i32_varying:
; GFX1064: ; %bb.0: ; %entry
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v2, v0
; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: s_mov_b64 exec, s[2:3]
; GFX1064-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -2098,10 +2098,10 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1032-LABEL: sub_i32_varying:
; GFX1032: ; %bb.0: ; %entry
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: ; implicit-def: $vcc_hi
; GFX1032-NEXT: v_mov_b32_e32 v2, v0
; GFX1032-NEXT: s_or_saveexec_b32 s2, -1
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: s_mov_b32 exec_lo, s2
; GFX1032-NEXT: v_cmp_ne_u32_e64 s2, 1, 0
@ -2917,9 +2917,9 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
;
; GFX8-LABEL: or_i32_varying:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v2, v0
; GFX8-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: s_mov_b64 exec, s[2:3]
; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -2970,9 +2970,9 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
;
; GFX9-LABEL: or_i32_varying:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -3022,9 +3022,9 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1064-LABEL: or_i32_varying:
; GFX1064: ; %bb.0: ; %entry
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v2, v0
; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: s_mov_b64 exec, s[2:3]
; GFX1064-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -3081,10 +3081,10 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1032-LABEL: or_i32_varying:
; GFX1032: ; %bb.0: ; %entry
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: ; implicit-def: $vcc_hi
; GFX1032-NEXT: v_mov_b32_e32 v2, v0
; GFX1032-NEXT: s_or_saveexec_b32 s2, -1
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: s_mov_b32 exec_lo, s2
; GFX1032-NEXT: v_cmp_ne_u32_e64 s2, 1, 0
@ -3159,9 +3159,9 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
;
; GFX8-LABEL: xor_i32_varying:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v2, v0
; GFX8-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: s_mov_b64 exec, s[2:3]
; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -3212,9 +3212,9 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
;
; GFX9-LABEL: xor_i32_varying:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -3264,9 +3264,9 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1064-LABEL: xor_i32_varying:
; GFX1064: ; %bb.0: ; %entry
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v2, v0
; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: s_mov_b64 exec, s[2:3]
; GFX1064-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -3323,10 +3323,10 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1032-LABEL: xor_i32_varying:
; GFX1032: ; %bb.0: ; %entry
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: ; implicit-def: $vcc_hi
; GFX1032-NEXT: v_mov_b32_e32 v2, v0
; GFX1032-NEXT: s_or_saveexec_b32 s2, -1
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: s_mov_b32 exec_lo, s2
; GFX1032-NEXT: v_cmp_ne_u32_e64 s2, 1, 0
@ -4265,9 +4265,9 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
;
; GFX8-LABEL: umax_i32_varying:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v2, v0
; GFX8-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: s_mov_b64 exec, s[2:3]
; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -4318,9 +4318,9 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
;
; GFX9-LABEL: umax_i32_varying:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, v0
; GFX9-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -4370,9 +4370,9 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1064-LABEL: umax_i32_varying:
; GFX1064: ; %bb.0: ; %entry
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v2, v0
; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: s_mov_b64 exec, s[2:3]
; GFX1064-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, 0
@ -4429,10 +4429,10 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
;
; GFX1032-LABEL: umax_i32_varying:
; GFX1032: ; %bb.0: ; %entry
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: ; implicit-def: $vcc_hi
; GFX1032-NEXT: v_mov_b32_e32 v2, v0
; GFX1032-NEXT: s_or_saveexec_b32 s2, -1
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: s_mov_b32 exec_lo, s2
; GFX1032-NEXT: v_cmp_ne_u32_e64 s2, 1, 0

View File

@ -107,3 +107,38 @@ body: |
S_ENDPGM 0
...
---
# Ensure that wwm is not put around an EXEC copy
#CHECK-LABEL: name: copy_exec
#CHECK: %7:sreg_64 = COPY $exec
#CHECK-NEXT: %14:sreg_64 = ENTER_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
#CHECK-NEXT: %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
#CHECK-NEXT: $exec = EXIT_WWM %14
#CHECK-NEXT: %9:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %7.sub0, 0, implicit $exec
name: copy_exec
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
%3:sgpr_32 = COPY $sgpr3
%2:sgpr_32 = COPY $sgpr2
%1:sgpr_32 = COPY $sgpr1
%0:sgpr_32 = COPY $sgpr0
%4:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
%5:sreg_32 = S_MOV_B32 0
%6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %4, %5, 0, 0, 0, 0, 0, 0, implicit $exec
%8:sreg_64 = COPY $exec
%9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%10:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %8.sub0:sreg_64, 0, implicit $exec
%11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, %10:vgpr_32, 312, 15, 15, 0, implicit $exec
%12:sreg_32 = V_READLANE_B32 %11:vgpr_32, 63
early-clobber %13:sreg_32 = WWM %9:vgpr_32, implicit $exec
%14:vgpr_32 = COPY %13
BUFFER_STORE_DWORD_OFFSET_exact killed %14, %4, %5, 4, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...