forked from OSchip/llvm-project
AMDGPU/GlobalISel: Fix RegBankSelect for unaligned, uniform constant loads
llvm-svn: 371416
This commit is contained in:
parent
9ede7c0395
commit
d8409b178e
|
@ -320,12 +320,13 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool isInstrUniformNonExtLoad(const MachineInstr &MI) {
|
static bool isInstrUniformNonExtLoadAlign4(const MachineInstr &MI) {
|
||||||
if (!MI.hasOneMemOperand())
|
if (!MI.hasOneMemOperand())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
const MachineMemOperand *MMO = *MI.memoperands_begin();
|
const MachineMemOperand *MMO = *MI.memoperands_begin();
|
||||||
return MMO->getSize() >= 4 && AMDGPUInstrInfo::isUniformMMO(MMO);
|
return MMO->getSize() >= 4 && MMO->getAlignment() >= 4 &&
|
||||||
|
AMDGPUInstrInfo::isUniformMMO(MMO);
|
||||||
}
|
}
|
||||||
|
|
||||||
RegisterBankInfo::InstructionMappings
|
RegisterBankInfo::InstructionMappings
|
||||||
|
@ -426,7 +427,7 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
|
||||||
unsigned PtrSize = PtrTy.getSizeInBits();
|
unsigned PtrSize = PtrTy.getSizeInBits();
|
||||||
unsigned AS = PtrTy.getAddressSpace();
|
unsigned AS = PtrTy.getAddressSpace();
|
||||||
LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
|
LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
|
||||||
if (isInstrUniformNonExtLoad(MI) &&
|
if (isInstrUniformNonExtLoadAlign4(MI) &&
|
||||||
(AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)) {
|
(AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)) {
|
||||||
const InstructionMapping &SSMapping = getInstructionMapping(
|
const InstructionMapping &SSMapping = getInstructionMapping(
|
||||||
1, 1, getOperandsMapping(
|
1, 1, getOperandsMapping(
|
||||||
|
@ -1482,7 +1483,7 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
|
||||||
const ValueMapping *ValMapping;
|
const ValueMapping *ValMapping;
|
||||||
const ValueMapping *PtrMapping;
|
const ValueMapping *PtrMapping;
|
||||||
|
|
||||||
if (isInstrUniformNonExtLoad(MI) &&
|
if (isInstrUniformNonExtLoadAlign4(MI) &&
|
||||||
(AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)) {
|
(AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)) {
|
||||||
// We have a uniform instruction so we want to use an SMRD load
|
// We have a uniform instruction so we want to use an SMRD load
|
||||||
ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
|
ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
# RUN: llc -march=amdgcn -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
|
||||||
# RUN: llc -march=amdgcn -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
|
||||||
|
|
||||||
--- |
|
--- |
|
||||||
define amdgpu_kernel void @load_global_v8i32_non_uniform(<8 x i32> addrspace(1)* %in) {
|
define amdgpu_kernel void @load_global_v8i32_non_uniform(<8 x i32> addrspace(1)* %in) {
|
||||||
|
@ -65,6 +65,9 @@
|
||||||
define amdgpu_kernel void @extload_global_i8_to_i32_uniform() { ret void }
|
define amdgpu_kernel void @extload_global_i8_to_i32_uniform() { ret void }
|
||||||
define amdgpu_kernel void @extload_constant_i16_to_i32_uniform() { ret void }
|
define amdgpu_kernel void @extload_constant_i16_to_i32_uniform() { ret void }
|
||||||
define amdgpu_kernel void @extload_global_i16_to_i32_uniform() { ret void }
|
define amdgpu_kernel void @extload_global_i16_to_i32_uniform() { ret void }
|
||||||
|
define amdgpu_kernel void @load_constant_i32_uniform_align4() {ret void}
|
||||||
|
define amdgpu_kernel void @load_constant_i32_uniform_align2() {ret void}
|
||||||
|
define amdgpu_kernel void @load_constant_i32_uniform_align1() {ret void}
|
||||||
|
|
||||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||||
attributes #0 = { nounwind readnone }
|
attributes #0 = { nounwind readnone }
|
||||||
|
@ -586,3 +589,49 @@ body: |
|
||||||
%0:_(p4) = COPY $sgpr0_sgpr1
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
||||||
%1:_(s32) = G_LOAD %0 :: (load 2, addrspace 1, align 2)
|
%1:_(s32) = G_LOAD %0 :: (load 2, addrspace 1, align 2)
|
||||||
...
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: load_constant_i32_uniform_align4
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0_sgpr1
|
||||||
|
; CHECK-LABEL: name: load_constant_i32_uniform_align4
|
||||||
|
; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||||
|
; CHECK: %1:sgpr(s32) = G_LOAD %0(p4) :: (load 4, addrspace 4)
|
||||||
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
||||||
|
%1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 4)
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: load_constant_i32_uniform_align2
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0_sgpr1
|
||||||
|
; CHECK-LABEL: name: load_constant_i32_uniform_align2
|
||||||
|
; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||||
|
; CHECK: %2:vgpr(p4) = COPY %0(p4)
|
||||||
|
; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 4, align 2, addrspace 4)
|
||||||
|
|
||||||
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
||||||
|
%1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 2)
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: load_constant_i32_uniform_align1
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0_sgpr1
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: load_constant_i32_uniform_align1
|
||||||
|
; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||||
|
; CHECK: %2:vgpr(p4) = COPY %0(p4)
|
||||||
|
; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 4, align 1, addrspace 4)
|
||||||
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
||||||
|
%1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 1)
|
||||||
|
...
|
||||||
|
|
Loading…
Reference in New Issue