[AMDGPU] Fix 224-bit spills

Related to D104622.

Differential Revision: https://reviews.llvm.org/D105109
This commit is contained in:
Piotr Sobczak 2021-06-29 12:35:34 +02:00
parent aaf6a7ac34
commit f38a8b54ea
3 changed files with 123 additions and 0 deletions

View File

@ -1394,6 +1394,8 @@ static unsigned getAGPRSpillSaveOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_A160_SAVE; return AMDGPU::SI_SPILL_A160_SAVE;
case 24: case 24:
return AMDGPU::SI_SPILL_A192_SAVE; return AMDGPU::SI_SPILL_A192_SAVE;
case 28:
return AMDGPU::SI_SPILL_A224_SAVE;
case 32: case 32:
return AMDGPU::SI_SPILL_A256_SAVE; return AMDGPU::SI_SPILL_A256_SAVE;
case 64: case 64:
@ -1531,6 +1533,8 @@ static unsigned getAGPRSpillRestoreOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_A160_RESTORE; return AMDGPU::SI_SPILL_A160_RESTORE;
case 24: case 24:
return AMDGPU::SI_SPILL_A192_RESTORE; return AMDGPU::SI_SPILL_A192_RESTORE;
case 28:
return AMDGPU::SI_SPILL_A224_RESTORE;
case 32: case 32:
return AMDGPU::SI_SPILL_A256_RESTORE; return AMDGPU::SI_SPILL_A256_RESTORE;
case 64: case 64:

View File

@ -814,6 +814,13 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_A256_SAVE: case AMDGPU::SI_SPILL_A256_SAVE:
case AMDGPU::SI_SPILL_A256_RESTORE: case AMDGPU::SI_SPILL_A256_RESTORE:
return 8; return 8;
case AMDGPU::SI_SPILL_S224_SAVE:
case AMDGPU::SI_SPILL_S224_RESTORE:
case AMDGPU::SI_SPILL_V224_SAVE:
case AMDGPU::SI_SPILL_V224_RESTORE:
case AMDGPU::SI_SPILL_A224_SAVE:
case AMDGPU::SI_SPILL_A224_RESTORE:
return 7;
case AMDGPU::SI_SPILL_S192_SAVE: case AMDGPU::SI_SPILL_S192_SAVE:
case AMDGPU::SI_SPILL_S192_RESTORE: case AMDGPU::SI_SPILL_S192_RESTORE:
case AMDGPU::SI_SPILL_V192_SAVE: case AMDGPU::SI_SPILL_V192_SAVE:
@ -1473,6 +1480,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S1024_SAVE:
case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S512_SAVE:
case AMDGPU::SI_SPILL_S256_SAVE: case AMDGPU::SI_SPILL_S256_SAVE:
case AMDGPU::SI_SPILL_S224_SAVE:
case AMDGPU::SI_SPILL_S192_SAVE: case AMDGPU::SI_SPILL_S192_SAVE:
case AMDGPU::SI_SPILL_S160_SAVE: case AMDGPU::SI_SPILL_S160_SAVE:
case AMDGPU::SI_SPILL_S128_SAVE: case AMDGPU::SI_SPILL_S128_SAVE:
@ -1483,6 +1491,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_S1024_RESTORE:
case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S512_RESTORE:
case AMDGPU::SI_SPILL_S256_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE:
case AMDGPU::SI_SPILL_S224_RESTORE:
case AMDGPU::SI_SPILL_S192_RESTORE: case AMDGPU::SI_SPILL_S192_RESTORE:
case AMDGPU::SI_SPILL_S160_RESTORE: case AMDGPU::SI_SPILL_S160_RESTORE:
case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_S128_RESTORE:
@ -1519,6 +1528,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S1024_SAVE:
case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S512_SAVE:
case AMDGPU::SI_SPILL_S256_SAVE: case AMDGPU::SI_SPILL_S256_SAVE:
case AMDGPU::SI_SPILL_S224_SAVE:
case AMDGPU::SI_SPILL_S192_SAVE: case AMDGPU::SI_SPILL_S192_SAVE:
case AMDGPU::SI_SPILL_S160_SAVE: case AMDGPU::SI_SPILL_S160_SAVE:
case AMDGPU::SI_SPILL_S128_SAVE: case AMDGPU::SI_SPILL_S128_SAVE:
@ -1533,6 +1543,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_S1024_RESTORE:
case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S512_RESTORE:
case AMDGPU::SI_SPILL_S256_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE:
case AMDGPU::SI_SPILL_S224_RESTORE:
case AMDGPU::SI_SPILL_S192_RESTORE: case AMDGPU::SI_SPILL_S192_RESTORE:
case AMDGPU::SI_SPILL_S160_RESTORE: case AMDGPU::SI_SPILL_S160_RESTORE:
case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_S128_RESTORE:
@ -1547,6 +1558,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_V1024_SAVE: case AMDGPU::SI_SPILL_V1024_SAVE:
case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V512_SAVE:
case AMDGPU::SI_SPILL_V256_SAVE: case AMDGPU::SI_SPILL_V256_SAVE:
case AMDGPU::SI_SPILL_V224_SAVE:
case AMDGPU::SI_SPILL_V192_SAVE: case AMDGPU::SI_SPILL_V192_SAVE:
case AMDGPU::SI_SPILL_V160_SAVE: case AMDGPU::SI_SPILL_V160_SAVE:
case AMDGPU::SI_SPILL_V128_SAVE: case AMDGPU::SI_SPILL_V128_SAVE:
@ -1556,6 +1568,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_A1024_SAVE: case AMDGPU::SI_SPILL_A1024_SAVE:
case AMDGPU::SI_SPILL_A512_SAVE: case AMDGPU::SI_SPILL_A512_SAVE:
case AMDGPU::SI_SPILL_A256_SAVE: case AMDGPU::SI_SPILL_A256_SAVE:
case AMDGPU::SI_SPILL_A224_SAVE:
case AMDGPU::SI_SPILL_A192_SAVE: case AMDGPU::SI_SPILL_A192_SAVE:
case AMDGPU::SI_SPILL_A160_SAVE: case AMDGPU::SI_SPILL_A160_SAVE:
case AMDGPU::SI_SPILL_A128_SAVE: case AMDGPU::SI_SPILL_A128_SAVE:
@ -1584,6 +1597,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_V128_RESTORE:
case AMDGPU::SI_SPILL_V160_RESTORE: case AMDGPU::SI_SPILL_V160_RESTORE:
case AMDGPU::SI_SPILL_V192_RESTORE: case AMDGPU::SI_SPILL_V192_RESTORE:
case AMDGPU::SI_SPILL_V224_RESTORE:
case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE:
case AMDGPU::SI_SPILL_V512_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE:
case AMDGPU::SI_SPILL_V1024_RESTORE: case AMDGPU::SI_SPILL_V1024_RESTORE:
@ -1593,6 +1607,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_A128_RESTORE: case AMDGPU::SI_SPILL_A128_RESTORE:
case AMDGPU::SI_SPILL_A160_RESTORE: case AMDGPU::SI_SPILL_A160_RESTORE:
case AMDGPU::SI_SPILL_A192_RESTORE: case AMDGPU::SI_SPILL_A192_RESTORE:
case AMDGPU::SI_SPILL_A224_RESTORE:
case AMDGPU::SI_SPILL_A256_RESTORE: case AMDGPU::SI_SPILL_A256_RESTORE:
case AMDGPU::SI_SPILL_A512_RESTORE: case AMDGPU::SI_SPILL_A512_RESTORE:
case AMDGPU::SI_SPILL_A1024_RESTORE: { case AMDGPU::SI_SPILL_A1024_RESTORE: {

View File

@ -0,0 +1,104 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast,si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=EXPANDED %s
# Make sure spill/restore of 224 bit registers works.
---
name: spill_restore_sgpr224
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
stackPtrOffsetReg: $sgpr32
body: |
; SPILLED-LABEL: name: spill_restore_sgpr224
; SPILLED: bb.0:
; SPILLED: successors: %bb.1(0x80000000)
; SPILLED: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; SPILLED: SI_SPILL_S224_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, %stack.0, implicit $exec, implicit $sgpr32 :: (store 28 into %stack.0, align 4, addrspace 5)
; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; SPILLED: bb.1:
; SPILLED: successors: %bb.2(0x80000000)
; SPILLED: S_NOP 1
; SPILLED: bb.2:
; SPILLED: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 = SI_SPILL_S224_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load 28 from %stack.0, align 4, addrspace 5)
; SPILLED: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-LABEL: name: spill_restore_sgpr224
; EXPANDED: bb.0:
; EXPANDED: successors: %bb.1(0x80000000)
; EXPANDED: liveins: $vgpr0
; EXPANDED: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr9, 5, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED: $vgpr0 = V_WRITELANE_B32 killed $sgpr10, 6, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; EXPANDED: bb.1:
; EXPANDED: successors: %bb.2(0x80000000)
; EXPANDED: liveins: $vgpr0
; EXPANDED: S_NOP 1
; EXPANDED: bb.2:
; EXPANDED: liveins: $vgpr0
; EXPANDED: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED: $sgpr5 = V_READLANE_B32 $vgpr0, 1
; EXPANDED: $sgpr6 = V_READLANE_B32 $vgpr0, 2
; EXPANDED: $sgpr7 = V_READLANE_B32 $vgpr0, 3
; EXPANDED: $sgpr8 = V_READLANE_B32 $vgpr0, 4
; EXPANDED: $sgpr9 = V_READLANE_B32 $vgpr0, 5
; EXPANDED: $sgpr10 = V_READLANE_B32 $vgpr0, 6
; EXPANDED: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
bb.0:
S_NOP 0, implicit-def %0:sgpr_224
S_CBRANCH_SCC1 implicit undef $scc, %bb.1
bb.1:
S_NOP 1
bb.2:
S_NOP 0, implicit %0
...
---
name: spill_restore_vgpr224
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
stackPtrOffsetReg: $sgpr32
body: |
; SPILLED-LABEL: name: spill_restore_vgpr224
; SPILLED: bb.0:
; SPILLED: successors: %bb.1(0x80000000)
; SPILLED: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; SPILLED: SI_SPILL_V224_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, %stack.0, $sgpr32, 0, implicit $exec :: (store 28 into %stack.0, align 4, addrspace 5)
; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; SPILLED: bb.1:
; SPILLED: successors: %bb.2(0x80000000)
; SPILLED: S_NOP 1
; SPILLED: bb.2:
; SPILLED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_V224_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 28 from %stack.0, align 4, addrspace 5)
; SPILLED: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; EXPANDED-LABEL: name: spill_restore_vgpr224
; EXPANDED: bb.0:
; EXPANDED: successors: %bb.1(0x80000000)
; EXPANDED: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; EXPANDED: SI_SPILL_V224_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, %stack.0, $sgpr32, 0, implicit $exec :: (store 28 into %stack.0, align 4, addrspace 5)
; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; EXPANDED: bb.1:
; EXPANDED: successors: %bb.2(0x80000000)
; EXPANDED: S_NOP 1
; EXPANDED: bb.2:
; EXPANDED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_V224_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 28 from %stack.0, align 4, addrspace 5)
; EXPANDED: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
bb.0:
S_NOP 0, implicit-def %0:vreg_224
S_CBRANCH_SCC1 implicit undef $scc, %bb.1
bb.1:
S_NOP 1
bb.2:
S_NOP 0, implicit %0
...