forked from OSchip/llvm-project
AMDGPU/GlobalISel: Fix crash on wide constant load with VGPR pointer
This was ignoring the register bank of the input pointer, and isUniformMMO seems overly aggressive. This will now conservatively assume a VGPR in cases where the incoming bank hasn't been determined yet (i.e. is from a loop phi). llvm-svn: 374255
This commit is contained in:
parent
001826835a
commit
85dfa82302
|
@ -323,6 +323,8 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
|
|||
}
|
||||
}
|
||||
|
||||
// FIXME: Returns uniform if there's no source value information. This is
|
||||
// probably wrong.
|
||||
static bool isInstrUniformNonExtLoadAlign4(const MachineInstr &MI) {
|
||||
if (!MI.hasOneMemOperand())
|
||||
return false;
|
||||
|
@ -1047,8 +1049,13 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
|
|||
SmallVector<unsigned, 1> SrcRegs(OpdMapper.getVRegs(1));
|
||||
|
||||
// If the pointer is an SGPR, we have nothing to do.
|
||||
if (SrcRegs.empty())
|
||||
if (SrcRegs.empty()) {
|
||||
Register PtrReg = MI.getOperand(1).getReg();
|
||||
const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI);
|
||||
if (PtrBank == &AMDGPU::SGPRRegBank)
|
||||
return false;
|
||||
SrcRegs.push_back(PtrReg);
|
||||
}
|
||||
|
||||
assert(LoadSize % MaxNonSmrdLoadSize == 0);
|
||||
|
||||
|
@ -2025,7 +2032,7 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
|
|||
|
||||
const MachineFunction &MF = *MI.getParent()->getParent();
|
||||
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
|
||||
SmallVector<const ValueMapping*, 2> OpdsMapping(2);
|
||||
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
|
||||
LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
|
||||
Register PtrReg = MI.getOperand(1).getReg();
|
||||
|
@ -2036,7 +2043,10 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
|
|||
const ValueMapping *ValMapping;
|
||||
const ValueMapping *PtrMapping;
|
||||
|
||||
if ((AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
|
||||
const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI);
|
||||
|
||||
if (PtrBank == &AMDGPU::SGPRRegBank &&
|
||||
(AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
|
||||
AS != AMDGPUAS::PRIVATE_ADDRESS) &&
|
||||
isInstrUniformNonExtLoadAlign4(MI)) {
|
||||
// We have a uniform instruction so we want to use an SMRD load
|
||||
|
|
|
@ -69,6 +69,8 @@
|
|||
define amdgpu_kernel void @load_constant_i32_uniform_align2() {ret void}
|
||||
define amdgpu_kernel void @load_constant_i32_uniform_align1() {ret void}
|
||||
define amdgpu_kernel void @load_private_uniform_sgpr_i32() {ret void}
|
||||
define amdgpu_kernel void @load_constant_v8i32_vgpr_crash() { ret void }
|
||||
define amdgpu_kernel void @load_constant_v8i32_vgpr_crash_loop_phi() { ret void }
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
@ -652,3 +654,47 @@ body: |
|
|||
%0:_(p5) = COPY $sgpr0
|
||||
%1:_(s32) = G_LOAD %0 :: (load 4, addrspace 5, align 4)
|
||||
...
|
||||
|
||||
---
|
||||
name: load_constant_v8i32_vgpr_crash
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash
|
||||
; CHECK: %0:vgpr(p4) = COPY $vgpr0_vgpr1
|
||||
; CHECK: vgpr(<4 x s32>) = G_LOAD %0(p4)
|
||||
; CHECK: vgpr(<4 x s32>) = G_LOAD
|
||||
; CHECK: G_CONCAT_VECTORS
|
||||
%0:_(p4) = COPY $vgpr0_vgpr1
|
||||
%1:_(<8 x s32>) = G_LOAD %0 :: (load 32, addrspace 4)
|
||||
...
|
||||
|
||||
---
|
||||
name: load_constant_v8i32_vgpr_crash_loop_phi
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
|
||||
|
||||
; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash_loop_phi
|
||||
; CHECK: G_PHI
|
||||
; CHECK: vgpr(<4 x s32>) = G_LOAD
|
||||
; CHECK: vgpr(<4 x s32>) = G_LOAD
|
||||
; CHECK: G_CONCAT_VECTORS
|
||||
|
||||
%0:_(p4) = COPY $sgpr0_sgpr1
|
||||
%1:_(p4) = COPY $sgpr2_sgpr3
|
||||
G_BR %bb.1
|
||||
|
||||
bb.1:
|
||||
%2:_(p4) = G_PHI %0, %bb.0, %4, %bb.1
|
||||
%3:_(<8 x s32>) = G_LOAD %2 :: (load 32, addrspace 4)
|
||||
%4:_(p4) = COPY %1
|
||||
G_BR %bb.1
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue