forked from OSchip/llvm-project
[AMDGPU] Support emitting GOT relocations for function calls
Differential Revision: https://reviews.llvm.org/D57416 llvm-svn: 353083
This commit is contained in:
parent
70560a0a2c
commit
d19d197221
|
@ -69,8 +69,6 @@ def AMDGPUAddeSubeOp : SDTypeProfile<2, 3,
|
|||
[SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisVT<0, i32>, SDTCisVT<1, i1>, SDTCisVT<4, i1>]
|
||||
>;
|
||||
|
||||
def SDT_AMDGPUTCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDGPU DAG Nodes
|
||||
//
|
||||
|
@ -95,7 +93,8 @@ def AMDGPUcall : SDNode<"AMDGPUISD::CALL",
|
|||
SDNPVariadic]
|
||||
>;
|
||||
|
||||
def AMDGPUtc_return: SDNode<"AMDGPUISD::TC_RETURN", SDT_AMDGPUTCRET,
|
||||
def AMDGPUtc_return: SDNode<"AMDGPUISD::TC_RETURN",
|
||||
SDTypeProfile<0, 3, [SDTCisPtrTy<0>]>,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
|
||||
>;
|
||||
|
||||
|
|
|
@ -2711,6 +2711,11 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
|
|||
std::vector<SDValue> Ops;
|
||||
Ops.push_back(Chain);
|
||||
Ops.push_back(Callee);
|
||||
// Add a redundant copy of the callee global which will not be legalized, as
|
||||
// we need direct access to the callee later.
|
||||
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Callee);
|
||||
const GlobalValue *GV = GSD->getGlobal();
|
||||
Ops.push_back(DAG.getTargetGlobalAddress(GV, DL, MVT::i64));
|
||||
|
||||
if (IsTailCall) {
|
||||
// Each tail call may have to adjust the stack by a different amount, so
|
||||
|
@ -3474,34 +3479,16 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
|
|||
.addReg(Info->getFrameOffsetReg(), RegState::Implicit);
|
||||
return BB;
|
||||
}
|
||||
case AMDGPU::SI_CALL_ISEL:
|
||||
case AMDGPU::SI_TCRETURN_ISEL: {
|
||||
case AMDGPU::SI_CALL_ISEL: {
|
||||
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
|
||||
const DebugLoc &DL = MI.getDebugLoc();
|
||||
|
||||
unsigned ReturnAddrReg = TII->getRegisterInfo().getReturnAddressReg(*MF);
|
||||
|
||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||
unsigned GlobalAddrReg = MI.getOperand(0).getReg();
|
||||
MachineInstr *PCRel = MRI.getVRegDef(GlobalAddrReg);
|
||||
assert(PCRel->getOpcode() == AMDGPU::SI_PC_ADD_REL_OFFSET);
|
||||
|
||||
const GlobalValue *G = PCRel->getOperand(1).getGlobal();
|
||||
|
||||
MachineInstrBuilder MIB;
|
||||
if (MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
|
||||
MIB = BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_CALL), ReturnAddrReg)
|
||||
.add(MI.getOperand(0))
|
||||
.addGlobalAddress(G);
|
||||
} else {
|
||||
MIB = BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_TCRETURN))
|
||||
.add(MI.getOperand(0))
|
||||
.addGlobalAddress(G);
|
||||
MIB = BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_CALL), ReturnAddrReg);
|
||||
|
||||
// There is an additional imm operand for tcreturn, but it should be in the
|
||||
// right place already.
|
||||
}
|
||||
|
||||
for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
|
||||
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
|
||||
MIB.add(MI.getOperand(I));
|
||||
|
||||
MIB.cloneMemRefs(MI);
|
||||
|
@ -4008,7 +3995,10 @@ bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
|
|||
}
|
||||
|
||||
bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
|
||||
return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
|
||||
// FIXME: Either avoid relying on address space here or change the default
|
||||
// address space for functions to avoid the explicit check.
|
||||
return (GV->getValueType()->isFunctionTy() ||
|
||||
GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
|
||||
GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
|
||||
GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
|
||||
!shouldEmitFixup(GV) &&
|
||||
|
|
|
@ -373,7 +373,8 @@ def SI_RETURN : SPseudoInstSI <
|
|||
// This version is only needed so we can fill in the output regiter in
|
||||
// the custom inserter.
|
||||
def SI_CALL_ISEL : SPseudoInstSI <
|
||||
(outs), (ins SSrc_b64:$src0), [(AMDGPUcall i64:$src0)]> {
|
||||
(outs), (ins SSrc_b64:$src0, unknown:$callee),
|
||||
[(AMDGPUcall i64:$src0, tglobaladdr:$callee)]> {
|
||||
let Size = 4;
|
||||
let isCall = 1;
|
||||
let SchedRW = [WriteBranch];
|
||||
|
@ -391,20 +392,9 @@ def SI_CALL : SPseudoInstSI <
|
|||
}
|
||||
|
||||
// Tail call handling pseudo
|
||||
def SI_TCRETURN_ISEL : SPseudoInstSI<(outs),
|
||||
(ins SSrc_b64:$src0, i32imm:$fpdiff),
|
||||
[(AMDGPUtc_return i64:$src0, i32:$fpdiff)]> {
|
||||
let isCall = 1;
|
||||
let isTerminator = 1;
|
||||
let isReturn = 1;
|
||||
let isBarrier = 1;
|
||||
let SchedRW = [WriteBranch];
|
||||
let usesCustomInserter = 1;
|
||||
}
|
||||
|
||||
def SI_TCRETURN : SPseudoInstSI <
|
||||
(outs),
|
||||
(ins SSrc_b64:$src0, unknown:$callee, i32imm:$fpdiff)> {
|
||||
def SI_TCRETURN : SPseudoInstSI <(outs),
|
||||
(ins SSrc_b64:$src0, unknown:$callee, i32imm:$fpdiff),
|
||||
[(AMDGPUtc_return i64:$src0, tglobaladdr:$callee, i32:$fpdiff)]> {
|
||||
let Size = 4;
|
||||
let isCall = 1;
|
||||
let isTerminator = 1;
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji < %s | FileCheck %s
|
||||
|
||||
declare void @func(i32 addrspace(1)* %out)
|
||||
|
||||
declare protected void @protected_func(i32 addrspace(1)* %out)
|
||||
|
||||
declare hidden void @hidden_func(i32 addrspace(1)* %out)
|
||||
|
||||
; CHECK-LABEL: call_func:
|
||||
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
|
||||
; CHECK: s_add_u32 s[[GOT_ADDR_LO:[0-9]+]], s[[PC_LO]], func@gotpcrel32@lo+4
|
||||
; CHECK: s_addc_u32 s[[GOT_ADDR_HI:[0-9]+]], s[[PC_HI]], func@gotpcrel32@hi+4
|
||||
; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOT_ADDR_LO]]:[[GOT_ADDR_HI]]{{\]}}, 0x0
|
||||
; CHECK: s_swappc_b64 s{{\[}}{{[0-9]+:[0-9]+}}{{\]}}, s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}}
|
||||
define amdgpu_kernel void @call_func(i32 addrspace(1)* %out) {
|
||||
call void @func(i32 addrspace(1)* %out)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_protected_func:
|
||||
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
|
||||
; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], protected_func@rel32@lo+4
|
||||
; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], protected_func@rel32@hi+4
|
||||
; CHECK: s_swappc_b64 s{{\[}}{{[0-9]+:[0-9]+}}{{\]}}, s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}}
|
||||
define amdgpu_kernel void @call_protected_func(i32 addrspace(1)* %out) {
|
||||
call void @protected_func(i32 addrspace(1)* %out)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_hidden_func:
|
||||
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
|
||||
; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], hidden_func@rel32@lo+4
|
||||
; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], hidden_func@rel32@hi+4
|
||||
; CHECK: s_swappc_b64 s{{\[}}{{[0-9]+:[0-9]+}}{{\]}}, s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}}
|
||||
define amdgpu_kernel void @call_hidden_func(i32 addrspace(1)* %out) {
|
||||
call void @hidden_func(i32 addrspace(1)* %out)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i64 @funci()
|
||||
|
||||
; CHECK-LABEL: tail_call_func:
|
||||
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
|
||||
; CHECK: s_add_u32 s[[GOT_ADDR_LO:[0-9]+]], s[[PC_LO]], funci@gotpcrel32@lo+4
|
||||
; CHECK: s_addc_u32 s[[GOT_ADDR_HI:[0-9]+]], s[[PC_HI]], funci@gotpcrel32@hi+4
|
||||
; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOT_ADDR_LO]]:[[GOT_ADDR_HI]]{{\]}}, 0x0
|
||||
; CHECK: s_setpc_b64 s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}}
|
||||
define i64 @tail_call_func() {
|
||||
%ret = tail call i64 @funci()
|
||||
ret i64 %ret
|
||||
}
|
Loading…
Reference in New Issue