forked from OSchip/llvm-project
AMDGPU/GlobalISel: Fix reg bank for uniform LDS loads
The pointer is always a VGPR. Also fix hardcoding the pointer size to 64. llvm-svn: 371411
This commit is contained in:
parent
3c8644666c
commit
fdb7030117
|
@ -422,21 +422,24 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
|
|||
}
|
||||
case TargetOpcode::G_LOAD: {
|
||||
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
|
||||
LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());
|
||||
unsigned PtrSize = PtrTy.getSizeInBits();
|
||||
unsigned AS = PtrTy.getAddressSpace();
|
||||
LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
|
||||
// FIXME: Should we be hard coding the size for these mappings?
|
||||
if (isInstrUniform(MI)) {
|
||||
if (isInstrUniform(MI) &&
|
||||
(AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)) {
|
||||
const InstructionMapping &SSMapping = getInstructionMapping(
|
||||
1, 1, getOperandsMapping(
|
||||
{AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
|
||||
AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
|
||||
AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize)}),
|
||||
2); // Num Operands
|
||||
AltMappings.push_back(&SSMapping);
|
||||
}
|
||||
|
||||
const InstructionMapping &VVMapping = getInstructionMapping(
|
||||
2, 1, getOperandsMapping(
|
||||
{AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy),
|
||||
AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
|
||||
{AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy),
|
||||
AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}),
|
||||
2); // Num Operands
|
||||
AltMappings.push_back(&VVMapping);
|
||||
|
||||
|
@ -1471,18 +1474,21 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
|
|||
SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
|
||||
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
|
||||
LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
|
||||
unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
|
||||
Register PtrReg = MI.getOperand(1).getReg();
|
||||
LLT PtrTy = MRI.getType(PtrReg);
|
||||
unsigned AS = PtrTy.getAddressSpace();
|
||||
unsigned PtrSize = PtrTy.getSizeInBits();
|
||||
|
||||
const ValueMapping *ValMapping;
|
||||
const ValueMapping *PtrMapping;
|
||||
|
||||
if (isInstrUniform(MI)) {
|
||||
if (isInstrUniform(MI) &&
|
||||
(AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)) {
|
||||
// We have a uniform instruction so we want to use an SMRD load
|
||||
ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
|
||||
PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
|
||||
} else {
|
||||
ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy);
|
||||
// FIXME: What would happen if we used SGPRRegBankID here?
|
||||
PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
# RUN: llc -march=amdgcn -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
|
||||
|
||||
# REQUIRES: global-isel
|
||||
|
||||
--- |
|
||||
define amdgpu_kernel void @load_global_v8i32_non_uniform(<8 x i32> addrspace(1)* %in) {
|
||||
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
@ -56,10 +54,14 @@
|
|||
%tmp2 = load <8 x i64>, <8 x i64> addrspace(4)* %constant.not.uniform.v8i64
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @load_constant_v8i32_uniform() {ret void}
|
||||
define amdgpu_kernel void @load_constant_v4i64_uniform() {ret void}
|
||||
define amdgpu_kernel void @load_constant_v16i32_uniform() {ret void}
|
||||
define amdgpu_kernel void @load_constant_v8i64_uniform() {ret void}
|
||||
define amdgpu_kernel void @load_local_uniform() { ret void }
|
||||
define amdgpu_kernel void @load_region_uniform() { ret void }
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
attributes #0 = { nounwind readnone }
|
||||
...
|
||||
|
@ -486,3 +488,34 @@ body: |
|
|||
%0:_(p4) = COPY $sgpr0_sgpr1
|
||||
%1:_(<8 x s64>) = G_LOAD %0 :: (load 64, addrspace 4)
|
||||
...
|
||||
|
||||
---
|
||||
name : load_local_uniform
|
||||
legalized: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0
|
||||
; CHECK-LABEL: load_local_uniform
|
||||
; CHECK: %0:sgpr(p3) = COPY $sgpr0
|
||||
; CHECK: %2:vgpr(p3) = COPY %0(p3)
|
||||
; CHECK: %1:vgpr(s32) = G_LOAD %2(p3) :: (load 4, addrspace 3)
|
||||
|
||||
%0:_(p3) = COPY $sgpr0
|
||||
%1:_(s32) = G_LOAD %0 :: (load 4, addrspace 3)
|
||||
|
||||
...
|
||||
---
|
||||
name : load_region_uniform
|
||||
legalized: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0
|
||||
; CHECK-LABEL: load_region_uniform
|
||||
; CHECK: %0:sgpr(p3) = COPY $sgpr0
|
||||
; CHECK: %2:vgpr(p3) = COPY %0(p3)
|
||||
; CHECK: %1:vgpr(s32) = G_LOAD %2(p3) :: (load 4, addrspace 5)
|
||||
|
||||
%0:_(p3) = COPY $sgpr0
|
||||
%1:_(s32) = G_LOAD %0 :: (load 4, addrspace 5)
|
||||
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue