[AMDGPU] Change FLAT Scratch SADDR to VADDR form in moveToVALU

Extend the legalization of global SADDR loads and stores
with changing to VADDR to the FLAT scratch instructions.

Differential Revision: https://reviews.llvm.org/D101408
This commit is contained in:
Stanislav Mekhanoshin 2021-04-30 11:26:53 -07:00
parent d98e5e02ad
commit 4d6ebe8ac0
4 changed files with 35 additions and 13 deletions

View File

@ -5021,6 +5021,8 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const {
assert(isSegmentSpecificFLAT(Inst));
int NewOpc = AMDGPU::getGlobalVaddrOp(Opc);
if (NewOpc < 0)
NewOpc = AMDGPU::getFlatScratchInstSVfromSS(Opc);
if (NewOpc < 0)
return false;
@ -5034,14 +5036,17 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const {
return false;
int OldVAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
assert(OldVAddrIdx >= 0);
// Check vaddr, it shall be zero
MachineOperand &VAddr = Inst.getOperand(OldVAddrIdx);
MachineInstr *VAddrDef = MRI.getUniqueVRegDef(VAddr.getReg());
if (!VAddrDef || VAddrDef->getOpcode() != AMDGPU::V_MOV_B32_e32 ||
!VAddrDef->getOperand(1).isImm() || VAddrDef->getOperand(1).getImm() != 0)
return false;
// Check vaddr, it shall be zero or absent.
MachineInstr *VAddrDef = nullptr;
if (OldVAddrIdx >= 0) {
MachineOperand &VAddr = Inst.getOperand(OldVAddrIdx);
VAddrDef = MRI.getUniqueVRegDef(VAddr.getReg());
if (!VAddrDef || VAddrDef->getOpcode() != AMDGPU::V_MOV_B32_e32 ||
!VAddrDef->getOperand(1).isImm() ||
VAddrDef->getOperand(1).getImm() != 0)
return false;
}
const MCInstrDesc &NewDesc = get(NewOpc);
Inst.setDesc(NewDesc);
@ -5060,10 +5065,12 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const {
MRI.addRegOperandToUseList(&NewVAddr);
} else {
assert(OldSAddrIdx == NewVAddrIdx);
Inst.RemoveOperand(OldVAddrIdx);
if (OldVAddrIdx >= 0)
Inst.RemoveOperand(OldVAddrIdx);
}
if (MRI.use_nodbg_empty(VAddrDef->getOperand(0).getReg()))
if (VAddrDef && MRI.use_nodbg_empty(VAddrDef->getOperand(0).getReg()))
VAddrDef->eraseFromParent();
return true;

View File

@ -1215,12 +1215,21 @@ namespace AMDGPU {
LLVM_READONLY
int getVCMPXNoSDstOp(uint16_t Opcode);
/// \returns ST form with only immediate offset of a FLAT Scratch instruction
/// given an \p Opcode of an SS (SADDR) form.
LLVM_READONLY
int getFlatScratchInstSTfromSS(uint16_t Opcode);
/// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode
/// of an SV (VADDR) form.
LLVM_READONLY
int getFlatScratchInstSSfromSV(uint16_t Opcode);
/// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
/// of an SS (SADDR) form.
LLVM_READONLY
int getFlatScratchInstSVfromSS(uint16_t Opcode);
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);

View File

@ -2549,6 +2549,14 @@ def getFlatScratchInstSSfromSV : InstrMapping {
let ValueCols = [["SS"]];
}
def getFlatScratchInstSVfromSS : InstrMapping {
let FilterClass = "FlatScratchInst";
let RowFields = ["SVOp"];
let ColFields = ["Mode"];
let KeyCol = ["SS"];
let ValueCols = [["SV"]];
}
include "SIInstructions.td"
include "DSInstructions.td"

View File

@ -350,8 +350,7 @@ body: |
; GCN: bb.1:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %6, %bb.1
; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]], implicit $exec
; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR [[V_READFIRSTLANE_B32_]], 0, 0, implicit $exec, implicit $flat_scr
; GCN: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[PHI]], 0, 0, implicit $exec, implicit $flat_scr
; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec
; GCN: V_CMP_NE_U32_e32 0, [[V_AND_B32_e64_]], implicit-def $vcc, implicit $exec
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec
@ -387,8 +386,7 @@ body: |
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %6, %bb.1
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]], implicit $exec
; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], [[V_READFIRSTLANE_B32_]], 0, 0, implicit $exec, implicit $flat_scr
; GCN: SCRATCH_STORE_DWORD [[DEF]], [[PHI]], 0, 0, implicit $exec, implicit $flat_scr
; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec
; GCN: V_CMP_NE_U32_e32 0, [[V_AND_B32_e64_]], implicit-def $vcc, implicit $exec
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec