[AMDGPU] Folding of FI operand with flat scratch

Differential Revision: https://reviews.llvm.org/D93501
This commit is contained in:
Stanislav Mekhanoshin 2020-12-17 16:48:04 -08:00
parent a5311d731e
commit ae8f4b2178
6 changed files with 144 additions and 17 deletions

View File

@ -172,9 +172,23 @@ static bool frameIndexMayFold(const SIInstrInfo *TII,
const MachineInstr &UseMI,
int OpNo,
const MachineOperand &OpToFold) {
return OpToFold.isFI() &&
TII->isMUBUF(UseMI) &&
OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), AMDGPU::OpName::vaddr);
if (!OpToFold.isFI())
return false;
if (TII->isMUBUF(UseMI))
return OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
AMDGPU::OpName::vaddr);
if (!TII->isFLATScratch(UseMI))
return false;
int SIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
AMDGPU::OpName::saddr);
if (OpNo == SIdx)
return true;
int VIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
AMDGPU::OpName::vaddr);
return OpNo == VIdx && SIdx == -1;
}
FunctionPass *llvm::createSIFoldOperandsPass() {
@ -631,25 +645,36 @@ void SIFoldOperands::foldOperand(
// Sanity check that this is a stack access.
// FIXME: Should probably use stack pseudos before frame lowering.
if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() !=
MFI->getScratchRSrcReg())
return;
if (TII->isMUBUF(*UseMI)) {
if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() !=
MFI->getScratchRSrcReg())
return;
// Ensure this is either relative to the current frame or the current wave.
MachineOperand &SOff =
*TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset);
if ((!SOff.isReg() || SOff.getReg() != MFI->getStackPtrOffsetReg()) &&
(!SOff.isImm() || SOff.getImm() != 0))
return;
// Ensure this is either relative to the current frame or the current
// wave.
MachineOperand &SOff =
*TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset);
if ((!SOff.isReg() || SOff.getReg() != MFI->getStackPtrOffsetReg()) &&
(!SOff.isImm() || SOff.getImm() != 0))
return;
// If this is relative to the current wave, update it to be relative to
// the current frame.
if (SOff.isImm())
SOff.ChangeToRegister(MFI->getStackPtrOffsetReg(), false);
}
// A frame index will resolve to a positive constant, so it should always be
// safe to fold the addressing mode, even pre-GFX9.
UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex());
// If this is relative to the current wave, update it to be relative to the
// current frame.
if (SOff.isImm())
SOff.ChangeToRegister(MFI->getStackPtrOffsetReg(), false);
if (TII->isFLATScratch(*UseMI) &&
AMDGPU::getNamedOperandIdx(UseMI->getOpcode(),
AMDGPU::OpName::vaddr) != -1) {
unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(UseMI->getOpcode());
UseMI->setDesc(TII->get(NewOpc));
}
return;
}

View File

@ -1184,6 +1184,9 @@ namespace AMDGPU {
LLVM_READONLY
int getFlatScratchInstSTfromSS(uint16_t Opcode);
LLVM_READONLY
int getFlatScratchInstSSfromSV(uint16_t Opcode);
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);

View File

@ -2524,6 +2524,13 @@ def getFlatScratchInstSTfromSS : InstrMapping {
let ValueCols = [["ST"]];
}
def getFlatScratchInstSSfromSV : InstrMapping {
let FilterClass = "FlatScratchInst";
let RowFields = ["SVOp"];
let ColFields = ["Mode"];
let KeyCol = ["SV"];
let ValueCols = [["SS"]];
}
include "SIInstructions.td"

View File

@ -1498,6 +1498,10 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
int64_t Offset = FrameInfo.getObjectOffset(Index);
if (ST.enableFlatScratch()) {
if (TII->isFLATScratch(*MI)) {
assert((int16_t)FIOperandNum ==
AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::saddr));
// The offset is always swizzled, just replace it
if (FrameReg)
FIOp.ChangeToRegister(FrameReg, false);

View File

@ -0,0 +1,88 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-enable-flat-scratch -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
---
name: test_fold_fi_scratch_load_vgpr
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
body: |
bb.0.entry:
; GCN-LABEL: name: test_fold_fi_scratch_load_vgpr
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
; GCN: S_ENDPGM 0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
%1:vgpr_32 = SCRATCH_LOAD_DWORD %0:vgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
S_ENDPGM 0
...
---
name: test_fold_fi_scratch_load_sgpr
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
body: |
bb.0.entry:
; GCN-LABEL: name: test_fold_fi_scratch_load_sgpr
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 %stack.0
; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
; GCN: S_ENDPGM 0
%0:sgpr_32 = S_MOV_B32 %stack.0
%1:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %0:sgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
S_ENDPGM 0
...
---
name: test_fold_fi_scratch_store_vgpr
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
body: |
bb.0.entry:
; GCN-LABEL: name: test_fold_fi_scratch_store_vgpr
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], %stack.0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
; GCN: S_ENDPGM 0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
%1:vgpr_32 = IMPLICIT_DEF
SCRATCH_STORE_DWORD %1:vgpr_32, %0:vgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
S_ENDPGM 0
...
---
name: test_no_fold_fi_scratch_store_vgpr
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
body: |
bb.0.entry:
; GCN-LABEL: name: test_no_fold_fi_scratch_store_vgpr
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: SCRATCH_STORE_DWORD [[V_MOV_B32_e32_]], [[DEF]], 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
; GCN: S_ENDPGM 0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
%1:vgpr_32 = IMPLICIT_DEF
SCRATCH_STORE_DWORD %0:vgpr_32, %1:vgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
S_ENDPGM 0
...
---
name: test_fold_fi_scratch_store_sgpr
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
body: |
bb.0.entry:
; GCN-LABEL: name: test_fold_fi_scratch_store_sgpr
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 %stack.0
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], %stack.0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
; GCN: S_ENDPGM 0
%0:sgpr_32 = S_MOV_B32 %stack.0
%1:vgpr_32 = IMPLICIT_DEF
SCRATCH_STORE_DWORD_SADDR %1:vgpr_32, %0:sgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
S_ENDPGM 0
...

View File

@ -166,7 +166,7 @@ define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 } addrspace(5)* b
; GFX9: v_add_u32_e32 [[GEP:v[0-9]+]], 4, [[SP]]
; GFX9-MUBUF: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4{{$}}
; GFX9-FLATSCR: scratch_load_dword v{{[0-9]+}}, [[SP]], off offset:4{{$}}
; GFX9-FLATSCR: scratch_load_dword v{{[0-9]+}}, off, s32 offset:4{{$}}
; GCN: ds_write_b32 v{{[0-9]+}}, [[GEP]]
define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0, i32 %arg2) #0 {