forked from OSchip/llvm-project
[AMDGPU] Allow folding of sgpr to vgpr copy
Potentially sgpr to sgpr copy should also be possible. That is however trickier because we may end up with a wrong register class at use because of xm0/xexec permutations. Differential Revision: https://reviews.llvm.org/D69280
This commit is contained in:
parent
e3d26b42b9
commit
61e7a61bdc
|
@ -627,10 +627,11 @@ void SIFoldOperands::foldOperand(
|
|||
CopiesToReplace.push_back(UseMI);
|
||||
} else {
|
||||
if (UseMI->isCopy() && OpToFold.isReg() &&
|
||||
Register::isVirtualRegister(UseMI->getOperand(0).getReg()) &&
|
||||
UseMI->getOperand(0).getReg().isVirtual() &&
|
||||
TRI->isVectorRegister(*MRI, UseMI->getOperand(0).getReg()) &&
|
||||
TRI->isVectorRegister(*MRI, UseMI->getOperand(1).getReg()) &&
|
||||
!UseMI->getOperand(1).getSubReg()) {
|
||||
LLVM_DEBUG(dbgs() << "Folding " << OpToFold
|
||||
<< "\n into " << *UseMI << '\n');
|
||||
unsigned Size = TII->getOpSize(*UseMI, 1);
|
||||
UseMI->getOperand(1).setReg(OpToFold.getReg());
|
||||
UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-fold-operands,dead-mi-elimination -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
# GCN-LABEL: name: fold_vgpr_copy
|
||||
# GCN-LABEL: name: fold_vgpr_to_vgpr_copy
|
||||
# GCN: %0:vreg_64 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %2:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: DS_WRITE2_B32_gfx9 %0.sub0, killed %1, killed %2, 0, 1, 0, implicit $exec
|
||||
|
||||
---
|
||||
name: fold_vgpr_copy
|
||||
name: fold_vgpr_to_vgpr_copy
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
|
@ -17,14 +17,29 @@ body: |
|
|||
%1:vgpr_32 = COPY %0.sub0
|
||||
%2:vgpr_32 = COPY %1
|
||||
DS_WRITE2_B32_gfx9 %2, killed %4, killed %3, 0, 1, 0, implicit $exec
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: fold_sgpr_to_vgpr_copy
|
||||
# GCN: %0:sreg_64 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %1:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %2:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %4:vgpr_32 = COPY %0.sub0
|
||||
# GCN-NEXT: DS_WRITE2_B32_gfx9 %4, killed %1, killed %2, 0, 1, 0, implicit $exec
|
||||
name: fold_sgpr_to_vgpr_copy
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
%0:sreg_64 = IMPLICIT_DEF
|
||||
%4:vgpr_32 = IMPLICIT_DEF
|
||||
%3:vgpr_32 = IMPLICIT_DEF
|
||||
%1:sgpr_32 = COPY %0.sub0
|
||||
%2:vgpr_32 = COPY %1
|
||||
DS_WRITE2_B32_gfx9 %2, killed %4, killed %3, 0, 1, 0, implicit $exec
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: fma_sgpr_use
|
||||
# GCN: %0:sreg_64_xexec = IMPLICIT_DEF
|
||||
# GCN-NEXT: %2:sgpr_32 = COPY %0.sub1
|
||||
# GCN-NEXT: %4:vgpr_32 = nnan ninf nsz arcp contract afn reassoc V_FMA_F32 2, %0.sub0, 0, 1073741824, 0, %2, 0, 0, implicit $exec
|
||||
|
||||
# GCN-NEXT: %4:vgpr_32 = nnan ninf nsz arcp contract afn reassoc V_FMA_F32 2, %0.sub0, 0, 1073741824, 0, %0.sub1, 0, 0, implicit $exec
|
||||
---
|
||||
name: fma_sgpr_use
|
||||
body: |
|
||||
|
|
|
@ -40,15 +40,15 @@ define amdgpu_kernel void @saddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b
|
|||
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
|
||||
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s6
|
||||
; VI-NEXT: s_add_u32 s8, s6, s0
|
||||
; VI-NEXT: s_addc_u32 s9, s7, s1
|
||||
; VI-NEXT: s_add_u32 s2, s6, s0
|
||||
; VI-NEXT: s_addc_u32 s3, s7, s1
|
||||
; VI-NEXT: v_mov_b32_e32 v2, s7
|
||||
; VI-NEXT: v_cmp_lt_i64_e32 vcc, s[8:9], v[1:2]
|
||||
; VI-NEXT: v_cmp_lt_i64_e64 s[2:3], s[0:1], 0
|
||||
; VI-NEXT: v_mov_b32_e32 v3, s9
|
||||
; VI-NEXT: s_xor_b64 s[0:1], s[2:3], vcc
|
||||
; VI-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[1:2]
|
||||
; VI-NEXT: v_cmp_lt_i64_e64 s[8:9], s[0:1], 0
|
||||
; VI-NEXT: v_mov_b32_e32 v3, s3
|
||||
; VI-NEXT: s_xor_b64 s[0:1], s[8:9], vcc
|
||||
; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
|
||||
; VI-NEXT: v_add_u32_e32 v2, vcc, s8, v2
|
||||
; VI-NEXT: v_add_u32_e32 v2, vcc, s2, v2
|
||||
; VI-NEXT: v_mov_b32_e32 v0, s4
|
||||
; VI-NEXT: v_mov_b32_e32 v1, s5
|
||||
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
||||
|
@ -61,15 +61,15 @@ define amdgpu_kernel void @saddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b
|
|||
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s6
|
||||
; GFX9-NEXT: s_add_u32 s8, s6, s0
|
||||
; GFX9-NEXT: s_addc_u32 s9, s7, s1
|
||||
; GFX9-NEXT: s_add_u32 s2, s6, s0
|
||||
; GFX9-NEXT: s_addc_u32 s3, s7, s1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s7
|
||||
; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[8:9], v[1:2]
|
||||
; GFX9-NEXT: v_cmp_lt_i64_e64 s[2:3], s[0:1], 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, s9
|
||||
; GFX9-NEXT: s_xor_b64 s[0:1], s[2:3], vcc
|
||||
; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[1:2]
|
||||
; GFX9-NEXT: v_cmp_lt_i64_e64 s[8:9], s[0:1], 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, s3
|
||||
; GFX9-NEXT: s_xor_b64 s[0:1], s[8:9], vcc
|
||||
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s8, v2
|
||||
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s2, v2
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
|
||||
|
|
Loading…
Reference in New Issue