forked from OSchip/llvm-project
[AMDGPU][GlobalISel] Support register offsets for SMRDs.
Reviewed By: foad Differential Revision: https://reviews.llvm.org/D128836
This commit is contained in:
parent
9fbf1107cc
commit
8cd79bc12c
|
@ -3235,7 +3235,7 @@ static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) {
|
|||
// Match legalized form %zext = G_MERGE_VALUES (s32 %x), (s32 0)
|
||||
const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
|
||||
if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
|
||||
return false;
|
||||
return Register();
|
||||
|
||||
if (mi_match(Def->getOperand(2).getReg(), MRI, m_ZeroInt())) {
|
||||
return Def->getOperand(1).getReg();
|
||||
|
@ -3851,27 +3851,36 @@ AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
|
|||
getAddrModeInfo(*MI, *MRI, AddrInfo);
|
||||
|
||||
// FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
|
||||
// then we can select all ptr + 32-bit offsets not just immediate offsets.
|
||||
if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
|
||||
// then we can select all ptr + 32-bit offsets.
|
||||
if (AddrInfo.empty())
|
||||
return None;
|
||||
|
||||
const GEPInfo &GEPInfo = AddrInfo[0];
|
||||
// SGPR offset is unsigned.
|
||||
if (!GEPInfo.Imm || GEPInfo.Imm < 0 || !isUInt<32>(GEPInfo.Imm))
|
||||
return None;
|
||||
|
||||
// If we make it this far we have a load with an 32-bit immediate offset.
|
||||
// It is OK to select this using a sgpr offset, because we have already
|
||||
// failed trying to select this load into one of the _IMM variants since
|
||||
// the _IMM Patterns are considered before the _SGPR patterns.
|
||||
Register PtrReg = GEPInfo.SgprParts[0];
|
||||
Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
|
||||
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
|
||||
.addImm(GEPInfo.Imm);
|
||||
return {{
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
|
||||
}};
|
||||
|
||||
// SGPR offset is unsigned.
|
||||
if (AddrInfo[0].SgprParts.size() == 1 && isUInt<32>(GEPInfo.Imm) &&
|
||||
GEPInfo.Imm != 0) {
|
||||
// If we make it this far we have a load with an 32-bit immediate offset.
|
||||
// It is OK to select this using a sgpr offset, because we have already
|
||||
// failed trying to select this load into one of the _IMM variants since
|
||||
// the _IMM Patterns are considered before the _SGPR patterns.
|
||||
Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
|
||||
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
|
||||
.addImm(GEPInfo.Imm);
|
||||
return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}};
|
||||
}
|
||||
|
||||
if (AddrInfo[0].SgprParts.size() == 2 && GEPInfo.Imm == 0) {
|
||||
if (Register OffsetReg =
|
||||
matchZeroExtendFromS32(*MRI, GEPInfo.SgprParts[1])) {
|
||||
return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}};
|
||||
}
|
||||
}
|
||||
|
||||
return None;
|
||||
}
|
||||
|
||||
std::pair<Register, int>
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
define amdgpu_kernel void @smrd_imm(i32 addrspace(4)* %const0) { ret void }
|
||||
define amdgpu_kernel void @smrd_wide() { ret void }
|
||||
define amdgpu_kernel void @constant_address_positive() { ret void }
|
||||
define amdgpu_kernel void @smrd_sgpr() { ret void }
|
||||
...
|
||||
---
|
||||
|
||||
|
@ -210,3 +211,24 @@ body: |
|
|||
%3:sgpr(s32) = G_LOAD %2 :: (dereferenceable invariant load (s32), align 4, addrspace 4)
|
||||
S_ENDPGM 0, implicit %3
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
# Test a load with a register offset.
|
||||
# GCN-LABEL: name: smrd_sgpr{{$}}
|
||||
# GCN: S_LOAD_DWORD_SGPR %0, %1, 0
|
||||
|
||||
name: smrd_sgpr
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1, $sgpr2
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s32) = COPY $sgpr2
|
||||
%2:sgpr(s64) = G_ZEXT %1:sgpr(s32)
|
||||
%4:sgpr(p4) = G_PTR_ADD %0, %2
|
||||
%5:sgpr(s32) = G_LOAD %4 :: (dereferenceable invariant load (s32), align 4, addrspace 4)
|
||||
S_ENDPGM 0, implicit %5
|
||||
...
|
||||
|
|
|
@ -2,10 +2,14 @@
|
|||
; from a register.
|
||||
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -global-isel -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GISEL %s
|
||||
|
||||
; GCN: %[[OFFSET:[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer
|
||||
; GCN: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0 :: (invariant load (s128) from %ir.13, addrspace 4)
|
||||
|
||||
; GISEL: $[[OFFSET:.*]] = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer
|
||||
; GISEL: S_LOAD_DWORDX4_SGPR killed renamable {{.*}}, killed renamable $[[OFFSET]], 0 :: (invariant load (<4 x s32>) from {{.*}}, addrspace 4)
|
||||
|
||||
define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %resNode0, i32 inreg %resNode1, <3 x i32> inreg %2, i32 inreg %3, <3 x i32> %4) local_unnamed_addr #2 {
|
||||
.entry:
|
||||
%5 = call i64 @llvm.amdgcn.s.getpc() #3
|
||||
|
|
Loading…
Reference in New Issue