forked from OSchip/llvm-project
AMDGPU/GlobalISel: Handle LDS with relocations case
This commit is contained in:
parent
ab2300bc15
commit
96352e0a1b
|
@ -1627,7 +1627,8 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
|
bool AMDGPUInstructionSelector::selectG_FRAME_INDEX_GLOBAL_VALUE(
|
||||||
|
MachineInstr &I) const {
|
||||||
Register DstReg = I.getOperand(0).getReg();
|
Register DstReg = I.getOperand(0).getReg();
|
||||||
const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
|
const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
|
||||||
const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
|
const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
|
||||||
|
@ -1961,7 +1962,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
|
||||||
case TargetOpcode::G_BRCOND:
|
case TargetOpcode::G_BRCOND:
|
||||||
return selectG_BRCOND(I);
|
return selectG_BRCOND(I);
|
||||||
case TargetOpcode::G_FRAME_INDEX:
|
case TargetOpcode::G_FRAME_INDEX:
|
||||||
return selectG_FRAME_INDEX(I);
|
case TargetOpcode::G_GLOBAL_VALUE:
|
||||||
|
return selectG_FRAME_INDEX_GLOBAL_VALUE(I);
|
||||||
case TargetOpcode::G_PTR_MASK:
|
case TargetOpcode::G_PTR_MASK:
|
||||||
return selectG_PTR_MASK(I);
|
return selectG_PTR_MASK(I);
|
||||||
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
|
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
|
||||||
|
|
|
@ -119,7 +119,7 @@ private:
|
||||||
bool selectG_STORE(MachineInstr &I) const;
|
bool selectG_STORE(MachineInstr &I) const;
|
||||||
bool selectG_SELECT(MachineInstr &I) const;
|
bool selectG_SELECT(MachineInstr &I) const;
|
||||||
bool selectG_BRCOND(MachineInstr &I) const;
|
bool selectG_BRCOND(MachineInstr &I) const;
|
||||||
bool selectG_FRAME_INDEX(MachineInstr &I) const;
|
bool selectG_FRAME_INDEX_GLOBAL_VALUE(MachineInstr &I) const;
|
||||||
bool selectG_PTR_MASK(MachineInstr &I) const;
|
bool selectG_PTR_MASK(MachineInstr &I) const;
|
||||||
bool selectG_EXTRACT_VECTOR_ELT(MachineInstr &I) const;
|
bool selectG_EXTRACT_VECTOR_ELT(MachineInstr &I) const;
|
||||||
bool selectG_INSERT_VECTOR_ELT(MachineInstr &I) const;
|
bool selectG_INSERT_VECTOR_ELT(MachineInstr &I) const;
|
||||||
|
|
|
@ -1734,6 +1734,12 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
|
||||||
|
|
||||||
// TODO: We could emit code to handle the initialization somewhere.
|
// TODO: We could emit code to handle the initialization somewhere.
|
||||||
if (!AMDGPUTargetLowering::hasDefinedInitializer(GV)) {
|
if (!AMDGPUTargetLowering::hasDefinedInitializer(GV)) {
|
||||||
|
const SITargetLowering *TLI = ST.getTargetLowering();
|
||||||
|
if (!TLI->shouldUseLDSConstAddress(GV)) {
|
||||||
|
MI.getOperand(1).setTargetFlags(SIInstrInfo::MO_ABS32_LO);
|
||||||
|
return true; // Leave in place;
|
||||||
|
}
|
||||||
|
|
||||||
B.buildConstant(DstReg, MFI->allocateLDSGlobal(B.getDataLayout(), *GV));
|
B.buildConstant(DstReg, MFI->allocateLDSGlobal(B.getDataLayout(), *GV));
|
||||||
MI.eraseFromParent();
|
MI.eraseFromParent();
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -4411,6 +4411,14 @@ bool SITargetLowering::shouldEmitPCReloc(const GlobalValue *GV) const {
|
||||||
return !shouldEmitFixup(GV) && !shouldEmitGOTReloc(GV);
|
return !shouldEmitFixup(GV) && !shouldEmitGOTReloc(GV);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool SITargetLowering::shouldUseLDSConstAddress(const GlobalValue *GV) const {
|
||||||
|
if (!GV->hasExternalLinkage())
|
||||||
|
return true;
|
||||||
|
|
||||||
|
const auto OS = getTargetMachine().getTargetTriple().getOS();
|
||||||
|
return OS == Triple::AMDHSA || OS == Triple::AMDPAL;
|
||||||
|
}
|
||||||
|
|
||||||
/// This transforms the control flow intrinsics to get the branch destination as
|
/// This transforms the control flow intrinsics to get the branch destination as
|
||||||
/// last parameter, also switches branch target with BR if the need arise
|
/// last parameter, also switches branch target with BR if the need arise
|
||||||
SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
|
SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
|
||||||
|
@ -5046,9 +5054,7 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
|
||||||
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
|
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
|
||||||
const GlobalValue *GV = GSD->getGlobal();
|
const GlobalValue *GV = GSD->getGlobal();
|
||||||
if ((GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
|
if ((GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
|
||||||
(!GV->hasExternalLinkage() ||
|
shouldUseLDSConstAddress(GV)) ||
|
||||||
getTargetMachine().getTargetTriple().getOS() == Triple::AMDHSA ||
|
|
||||||
getTargetMachine().getTargetTriple().getOS() == Triple::AMDPAL)) ||
|
|
||||||
GSD->getAddressSpace() == AMDGPUAS::REGION_ADDRESS ||
|
GSD->getAddressSpace() == AMDGPUAS::REGION_ADDRESS ||
|
||||||
GSD->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
|
GSD->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
|
||||||
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
|
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
|
||||||
|
|
|
@ -199,6 +199,10 @@ public:
|
||||||
/// global value \p GV, false otherwise.
|
/// global value \p GV, false otherwise.
|
||||||
bool shouldEmitPCReloc(const GlobalValue *GV) const;
|
bool shouldEmitPCReloc(const GlobalValue *GV) const;
|
||||||
|
|
||||||
|
/// \returns true if this should use a literal constant for an LDS address,
|
||||||
|
/// and not emit a relocation for an LDS global.
|
||||||
|
bool shouldUseLDSConstAddress(const GlobalValue *GV) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
|
// Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
|
||||||
// three offsets (voffset, soffset and instoffset) into the SDValue[3] array
|
// three offsets (voffset, soffset and instoffset) into the SDValue[3] array
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s
|
||||||
|
; FIXME: Merge with DAG test
|
||||||
|
|
||||||
|
@lds.external = external unnamed_addr addrspace(3) global [0 x i32]
|
||||||
|
@lds.defined = unnamed_addr addrspace(3) global [8 x i32] undef, align 8
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}test_basic:
|
||||||
|
; GCN: s_add_u32 s0, lds.defined@abs32@lo, s0 ; encoding: [0xff,0x00,0x00,0x80,A,A,A,A]
|
||||||
|
; GCN: v_add_u32_e32 v0, lds.external@abs32@lo, v0 ; encoding: [0xff,0x00,0x00,0x68,A,A,A,A]
|
||||||
|
|
||||||
|
; GCN: .globl lds.external
|
||||||
|
; GCN: .amdgpu_lds lds.external, 0, 4
|
||||||
|
; GCN: .globl lds.defined
|
||||||
|
; GCN: .amdgpu_lds lds.defined, 32, 8
|
||||||
|
define amdgpu_gs float @test_basic(i32 inreg %wave, i32 %arg1) #0 {
|
||||||
|
main_body:
|
||||||
|
%gep0 = getelementptr [0 x i32], [0 x i32] addrspace(3)* @lds.external, i32 0, i32 %arg1
|
||||||
|
%tmp = load i32, i32 addrspace(3)* %gep0
|
||||||
|
|
||||||
|
%gep1 = getelementptr [8 x i32], [8 x i32] addrspace(3)* @lds.defined, i32 0, i32 %wave
|
||||||
|
store i32 123, i32 addrspace(3)* %gep1
|
||||||
|
|
||||||
|
%r = bitcast i32 %tmp to float
|
||||||
|
ret float %r
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { "no-signed-zeros-fp-math"="true" }
|
||||||
|
attributes #4 = { convergent nounwind readnone }
|
|
@ -1,5 +1,3 @@
|
||||||
; RUN: not llc -global-isel -march=amdgcn -mcpu=tonga < %S/../lds-zero-initializer.ll 2>&1 | FileCheck %s
|
; RUN: not llc -global-isel -march=amdgcn -mcpu=tonga < %S/../lds-zero-initializer.ll 2>&1 | FileCheck %s
|
||||||
|
|
||||||
; FIXME: Select should succeed
|
|
||||||
; CHECK: error: <unknown>:0:0: in function load_zeroinit_lds_global void (i32 addrspace(1)*, i1): unsupported initializer for address space
|
; CHECK: error: <unknown>:0:0: in function load_zeroinit_lds_global void (i32 addrspace(1)*, i1): unsupported initializer for address space
|
||||||
; CHECK: LLVM ERROR: cannot select: %16:sreg_32(p3) = G_GLOBAL_VALUE @lds (in function: load_zeroinit_lds_global)
|
|
||||||
|
|
|
@ -1174,7 +1174,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
@lds0 = addrspace(3) global [512 x i32] undef
|
@lds0 = internal addrspace(3) global [512 x i32] undef
|
||||||
|
|
||||||
define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||||
; CI-LABEL: atomic_dec_shl_base_lds_0:
|
; CI-LABEL: atomic_dec_shl_base_lds_0:
|
||||||
|
@ -1759,7 +1759,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspa
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
@lds1 = addrspace(3) global [512 x i64] undef, align 8
|
@lds1 = internal addrspace(3) global [512 x i64] undef, align 8
|
||||||
|
|
||||||
define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||||
; CI-LABEL: atomic_dec_shl_base_lds_0_i64:
|
; CI-LABEL: atomic_dec_shl_base_lds_0_i64:
|
||||||
|
|
|
@ -516,7 +516,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspa
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
@lds0 = addrspace(3) global [512 x i32] undef, align 4
|
@lds0 = internal addrspace(3) global [512 x i32] undef, align 4
|
||||||
|
|
||||||
define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||||
; CI-LABEL: atomic_inc_shl_base_lds_0_i32:
|
; CI-LABEL: atomic_inc_shl_base_lds_0_i32:
|
||||||
|
@ -1331,7 +1331,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
@lds1 = addrspace(3) global [512 x i64] undef, align 8
|
@lds1 = internal addrspace(3) global [512 x i64] undef, align 8
|
||||||
|
|
||||||
define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||||
; CI-LABEL: atomic_inc_shl_base_lds_0_i64:
|
; CI-LABEL: atomic_inc_shl_base_lds_0_i64:
|
||||||
|
|
|
@ -47,10 +47,8 @@ main_body:
|
||||||
%gep0 = getelementptr [0 x i32], [0 x i32] addrspace(3)* @lds.external, i32 0, i32 %arg1
|
%gep0 = getelementptr [0 x i32], [0 x i32] addrspace(3)* @lds.external, i32 0, i32 %arg1
|
||||||
%tmp = load i32, i32 addrspace(3)* %gep0
|
%tmp = load i32, i32 addrspace(3)* %gep0
|
||||||
|
|
||||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %tmp, i32 0, i32 0)
|
|
||||||
%mask.32 = trunc i64 %mask to i32
|
|
||||||
%gep1 = getelementptr [8 x i32], [8 x i32] addrspace(3)* @lds.defined, i32 0, i32 %wave
|
%gep1 = getelementptr [8 x i32], [8 x i32] addrspace(3)* @lds.defined, i32 0, i32 %wave
|
||||||
store i32 %mask.32, i32 addrspace(3)* %gep1
|
store i32 123, i32 addrspace(3)* %gep1
|
||||||
|
|
||||||
%r = bitcast i32 %tmp to float
|
%r = bitcast i32 %tmp to float
|
||||||
ret float %r
|
ret float %r
|
||||||
|
|
Loading…
Reference in New Issue