[AMDGPU][GlobalISel] Support register offsets for SMRDs.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D128836
This commit is contained in:
Ivan Kosarev 2022-07-05 13:39:46 +01:00
parent 9fbf1107cc
commit 8cd79bc12c
3 changed files with 53 additions and 18 deletions

View File

@ -3235,7 +3235,7 @@ static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) {
// Match legalized form %zext = G_MERGE_VALUES (s32 %x), (s32 0)
const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
return false;
return Register();
if (mi_match(Def->getOperand(2).getReg(), MRI, m_ZeroInt())) {
return Def->getOperand(1).getReg();
@ -3851,27 +3851,36 @@ AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
getAddrModeInfo(*MI, *MRI, AddrInfo);
// FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
// then we can select all ptr + 32-bit offsets not just immediate offsets.
if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
// then we can select all ptr + 32-bit offsets.
if (AddrInfo.empty())
return None;
const GEPInfo &GEPInfo = AddrInfo[0];
// SGPR offset is unsigned.
if (!GEPInfo.Imm || GEPInfo.Imm < 0 || !isUInt<32>(GEPInfo.Imm))
return None;
// If we make it this far we have a load with an 32-bit immediate offset.
// It is OK to select this using a sgpr offset, because we have already
// failed trying to select this load into one of the _IMM variants since
// the _IMM Patterns are considered before the _SGPR patterns.
Register PtrReg = GEPInfo.SgprParts[0];
Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
.addImm(GEPInfo.Imm);
return {{
[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
[=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
}};
// SGPR offset is unsigned.
if (AddrInfo[0].SgprParts.size() == 1 && isUInt<32>(GEPInfo.Imm) &&
GEPInfo.Imm != 0) {
// If we make it this far we have a load with an 32-bit immediate offset.
// It is OK to select this using a sgpr offset, because we have already
// failed trying to select this load into one of the _IMM variants since
// the _IMM Patterns are considered before the _SGPR patterns.
Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
.addImm(GEPInfo.Imm);
return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
[=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}};
}
if (AddrInfo[0].SgprParts.size() == 2 && GEPInfo.Imm == 0) {
if (Register OffsetReg =
matchZeroExtendFromS32(*MRI, GEPInfo.SgprParts[1])) {
return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
[=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}};
}
}
return None;
}
std::pair<Register, int>

View File

@ -6,6 +6,7 @@
define amdgpu_kernel void @smrd_imm(i32 addrspace(4)* %const0) { ret void }
define amdgpu_kernel void @smrd_wide() { ret void }
define amdgpu_kernel void @constant_address_positive() { ret void }
define amdgpu_kernel void @smrd_sgpr() { ret void }
...
---
@ -210,3 +211,24 @@ body: |
%3:sgpr(s32) = G_LOAD %2 :: (dereferenceable invariant load (s32), align 4, addrspace 4)
S_ENDPGM 0, implicit %3
...
---
# Test a load with a register offset.
# GCN-LABEL: name: smrd_sgpr{{$}}
# GCN: S_LOAD_DWORD_SGPR %0, %1, 0
name: smrd_sgpr
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $sgpr0_sgpr1, $sgpr2
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s32) = COPY $sgpr2
%2:sgpr(s64) = G_ZEXT %1:sgpr(s32)
%4:sgpr(p4) = G_PTR_ADD %0, %2
%5:sgpr(s32) = G_LOAD %4 :: (dereferenceable invariant load (s32), align 4, addrspace 4)
S_ENDPGM 0, implicit %5
...

View File

@ -2,10 +2,14 @@
; from a register.
; RUN: llc -march=amdgcn -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -global-isel -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GISEL %s
; GCN: %[[OFFSET:[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer
; GCN: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0 :: (invariant load (s128) from %ir.13, addrspace 4)
; GISEL: $[[OFFSET:.*]] = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer
; GISEL: S_LOAD_DWORDX4_SGPR killed renamable {{.*}}, killed renamable $[[OFFSET]], 0 :: (invariant load (<4 x s32>) from {{.*}}, addrspace 4)
define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %resNode0, i32 inreg %resNode1, <3 x i32> inreg %2, i32 inreg %3, <3 x i32> %4) local_unnamed_addr #2 {
.entry:
%5 = call i64 @llvm.amdgcn.s.getpc() #3