forked from OSchip/llvm-project
[AMDGPU] Folding of FI operand with flat scratch
Differential Revision: https://reviews.llvm.org/D93501
This commit is contained in:
parent
a5311d731e
commit
ae8f4b2178
|
@ -172,9 +172,23 @@ static bool frameIndexMayFold(const SIInstrInfo *TII,
|
|||
const MachineInstr &UseMI,
|
||||
int OpNo,
|
||||
const MachineOperand &OpToFold) {
|
||||
return OpToFold.isFI() &&
|
||||
TII->isMUBUF(UseMI) &&
|
||||
OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), AMDGPU::OpName::vaddr);
|
||||
if (!OpToFold.isFI())
|
||||
return false;
|
||||
|
||||
if (TII->isMUBUF(UseMI))
|
||||
return OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
|
||||
AMDGPU::OpName::vaddr);
|
||||
if (!TII->isFLATScratch(UseMI))
|
||||
return false;
|
||||
|
||||
int SIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
|
||||
AMDGPU::OpName::saddr);
|
||||
if (OpNo == SIdx)
|
||||
return true;
|
||||
|
||||
int VIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
|
||||
AMDGPU::OpName::vaddr);
|
||||
return OpNo == VIdx && SIdx == -1;
|
||||
}
|
||||
|
||||
FunctionPass *llvm::createSIFoldOperandsPass() {
|
||||
|
@ -631,25 +645,36 @@ void SIFoldOperands::foldOperand(
|
|||
// Sanity check that this is a stack access.
|
||||
// FIXME: Should probably use stack pseudos before frame lowering.
|
||||
|
||||
if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() !=
|
||||
MFI->getScratchRSrcReg())
|
||||
return;
|
||||
if (TII->isMUBUF(*UseMI)) {
|
||||
if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() !=
|
||||
MFI->getScratchRSrcReg())
|
||||
return;
|
||||
|
||||
// Ensure this is either relative to the current frame or the current wave.
|
||||
MachineOperand &SOff =
|
||||
*TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset);
|
||||
if ((!SOff.isReg() || SOff.getReg() != MFI->getStackPtrOffsetReg()) &&
|
||||
(!SOff.isImm() || SOff.getImm() != 0))
|
||||
return;
|
||||
// Ensure this is either relative to the current frame or the current
|
||||
// wave.
|
||||
MachineOperand &SOff =
|
||||
*TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset);
|
||||
if ((!SOff.isReg() || SOff.getReg() != MFI->getStackPtrOffsetReg()) &&
|
||||
(!SOff.isImm() || SOff.getImm() != 0))
|
||||
return;
|
||||
|
||||
// If this is relative to the current wave, update it to be relative to
|
||||
// the current frame.
|
||||
if (SOff.isImm())
|
||||
SOff.ChangeToRegister(MFI->getStackPtrOffsetReg(), false);
|
||||
}
|
||||
|
||||
// A frame index will resolve to a positive constant, so it should always be
|
||||
// safe to fold the addressing mode, even pre-GFX9.
|
||||
UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex());
|
||||
|
||||
// If this is relative to the current wave, update it to be relative to the
|
||||
// current frame.
|
||||
if (SOff.isImm())
|
||||
SOff.ChangeToRegister(MFI->getStackPtrOffsetReg(), false);
|
||||
if (TII->isFLATScratch(*UseMI) &&
|
||||
AMDGPU::getNamedOperandIdx(UseMI->getOpcode(),
|
||||
AMDGPU::OpName::vaddr) != -1) {
|
||||
unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(UseMI->getOpcode());
|
||||
UseMI->setDesc(TII->get(NewOpc));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -1184,6 +1184,9 @@ namespace AMDGPU {
|
|||
LLVM_READONLY
|
||||
int getFlatScratchInstSTfromSS(uint16_t Opcode);
|
||||
|
||||
LLVM_READONLY
|
||||
int getFlatScratchInstSSfromSV(uint16_t Opcode);
|
||||
|
||||
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
|
||||
const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
|
||||
const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
|
||||
|
|
|
@ -2524,6 +2524,13 @@ def getFlatScratchInstSTfromSS : InstrMapping {
|
|||
let ValueCols = [["ST"]];
|
||||
}
|
||||
|
||||
def getFlatScratchInstSSfromSV : InstrMapping {
|
||||
let FilterClass = "FlatScratchInst";
|
||||
let RowFields = ["SVOp"];
|
||||
let ColFields = ["Mode"];
|
||||
let KeyCol = ["SV"];
|
||||
let ValueCols = [["SS"]];
|
||||
}
|
||||
|
||||
include "SIInstructions.td"
|
||||
|
||||
|
|
|
@ -1498,6 +1498,10 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
|||
int64_t Offset = FrameInfo.getObjectOffset(Index);
|
||||
if (ST.enableFlatScratch()) {
|
||||
if (TII->isFLATScratch(*MI)) {
|
||||
assert((int16_t)FIOperandNum ==
|
||||
AMDGPU::getNamedOperandIdx(MI->getOpcode(),
|
||||
AMDGPU::OpName::saddr));
|
||||
|
||||
// The offset is always swizzled, just replace it
|
||||
if (FrameReg)
|
||||
FIOp.ChangeToRegister(FrameReg, false);
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-enable-flat-scratch -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
---
|
||||
name: test_fold_fi_scratch_load_vgpr
|
||||
stack:
|
||||
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
; GCN-LABEL: name: test_fold_fi_scratch_load_vgpr
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
|
||||
; GCN: S_ENDPGM 0
|
||||
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
%1:vgpr_32 = SCRATCH_LOAD_DWORD %0:vgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fold_fi_scratch_load_sgpr
|
||||
stack:
|
||||
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
; GCN-LABEL: name: test_fold_fi_scratch_load_sgpr
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 %stack.0
|
||||
; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
|
||||
; GCN: S_ENDPGM 0
|
||||
%0:sgpr_32 = S_MOV_B32 %stack.0
|
||||
%1:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %0:sgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fold_fi_scratch_store_vgpr
|
||||
stack:
|
||||
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
; GCN-LABEL: name: test_fold_fi_scratch_store_vgpr
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], %stack.0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
|
||||
; GCN: S_ENDPGM 0
|
||||
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
SCRATCH_STORE_DWORD %1:vgpr_32, %0:vgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: test_no_fold_fi_scratch_store_vgpr
|
||||
stack:
|
||||
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
; GCN-LABEL: name: test_no_fold_fi_scratch_store_vgpr
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: SCRATCH_STORE_DWORD [[V_MOV_B32_e32_]], [[DEF]], 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
|
||||
; GCN: S_ENDPGM 0
|
||||
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
SCRATCH_STORE_DWORD %0:vgpr_32, %1:vgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: test_fold_fi_scratch_store_sgpr
|
||||
stack:
|
||||
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
; GCN-LABEL: name: test_fold_fi_scratch_store_sgpr
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 %stack.0
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], %stack.0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
|
||||
; GCN: S_ENDPGM 0
|
||||
%0:sgpr_32 = S_MOV_B32 %stack.0
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
SCRATCH_STORE_DWORD_SADDR %1:vgpr_32, %0:sgpr_32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
|
@ -166,7 +166,7 @@ define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 } addrspace(5)* b
|
|||
|
||||
; GFX9: v_add_u32_e32 [[GEP:v[0-9]+]], 4, [[SP]]
|
||||
; GFX9-MUBUF: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4{{$}}
|
||||
; GFX9-FLATSCR: scratch_load_dword v{{[0-9]+}}, [[SP]], off offset:4{{$}}
|
||||
; GFX9-FLATSCR: scratch_load_dword v{{[0-9]+}}, off, s32 offset:4{{$}}
|
||||
|
||||
; GCN: ds_write_b32 v{{[0-9]+}}, [[GEP]]
|
||||
define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0, i32 %arg2) #0 {
|
||||
|
|
Loading…
Reference in New Issue