forked from OSchip/llvm-project
AMDGPU/GlobalISel: Legalize G_GLOBAL_VALUE
Handle other cases besides LDS. Mostly a straight port of the existing handling, without the intermediate custom nodes. llvm-svn: 373286
This commit is contained in:
parent
4d536bfbea
commit
77ac400117
|
@ -309,7 +309,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
|||
.legalIf(isPointer(0));
|
||||
|
||||
setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
|
||||
getActionDefinitionsBuilder(G_GLOBAL_VALUE).customFor({LocalPtr});
|
||||
getActionDefinitionsBuilder(G_GLOBAL_VALUE)
|
||||
.customFor({LocalPtr, GlobalPtr, ConstantPtr, Constant32Ptr});
|
||||
|
||||
|
||||
auto &FPOpActions = getActionDefinitionsBuilder(
|
||||
|
@ -1509,6 +1510,62 @@ bool AMDGPULegalizerInfo::legalizeSinCos(
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(
|
||||
Register DstReg, LLT PtrTy,
|
||||
MachineIRBuilder &B, const GlobalValue *GV,
|
||||
unsigned Offset, unsigned GAFlags) const {
|
||||
// In order to support pc-relative addressing, SI_PC_ADD_REL_OFFSET is lowered
|
||||
// to the following code sequence:
|
||||
//
|
||||
// For constant address space:
|
||||
// s_getpc_b64 s[0:1]
|
||||
// s_add_u32 s0, s0, $symbol
|
||||
// s_addc_u32 s1, s1, 0
|
||||
//
|
||||
// s_getpc_b64 returns the address of the s_add_u32 instruction and then
|
||||
// a fixup or relocation is emitted to replace $symbol with a literal
|
||||
// constant, which is a pc-relative offset from the encoding of the $symbol
|
||||
// operand to the global variable.
|
||||
//
|
||||
// For global address space:
|
||||
// s_getpc_b64 s[0:1]
|
||||
// s_add_u32 s0, s0, $symbol@{gotpc}rel32@lo
|
||||
// s_addc_u32 s1, s1, $symbol@{gotpc}rel32@hi
|
||||
//
|
||||
// s_getpc_b64 returns the address of the s_add_u32 instruction and then
|
||||
// fixups or relocations are emitted to replace $symbol@*@lo and
|
||||
// $symbol@*@hi with lower 32 bits and higher 32 bits of a literal constant,
|
||||
// which is a 64-bit pc-relative offset from the encoding of the $symbol
|
||||
// operand to the global variable.
|
||||
//
|
||||
// What we want here is an offset from the value returned by s_getpc
|
||||
// (which is the address of the s_add_u32 instruction) to the global
|
||||
// variable, but since the encoding of $symbol starts 4 bytes after the start
|
||||
// of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
|
||||
// small. This requires us to add 4 to the global variable offset in order to
|
||||
// compute the correct address.
|
||||
|
||||
LLT ConstPtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
|
||||
|
||||
Register PCReg = PtrTy.getSizeInBits() != 32 ? DstReg :
|
||||
B.getMRI()->createGenericVirtualRegister(ConstPtrTy);
|
||||
|
||||
MachineInstrBuilder MIB = B.buildInstr(AMDGPU::SI_PC_ADD_REL_OFFSET)
|
||||
.addDef(PCReg);
|
||||
|
||||
MIB.addGlobalAddress(GV, Offset + 4, GAFlags);
|
||||
if (GAFlags == SIInstrInfo::MO_NONE)
|
||||
MIB.addImm(0);
|
||||
else
|
||||
MIB.addGlobalAddress(GV, Offset + 4, GAFlags + 1);
|
||||
|
||||
B.getMRI()->setRegClass(PCReg, &AMDGPU::SReg_64RegClass);
|
||||
|
||||
if (PtrTy.getSizeInBits() == 32)
|
||||
B.buildExtract(DstReg, PCReg, 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPULegalizerInfo::legalizeGlobalValue(
|
||||
MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B) const {
|
||||
|
@ -1519,10 +1576,9 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
|
|||
const GlobalValue *GV = MI.getOperand(1).getGlobal();
|
||||
MachineFunction &MF = B.getMF();
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
B.setInstr(MI);
|
||||
|
||||
if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
|
||||
B.setInstr(MI);
|
||||
|
||||
if (!MFI->isEntryFunction()) {
|
||||
const Function &Fn = MF.getFunction();
|
||||
DiagnosticInfoUnsupported BadLDSDecl(
|
||||
|
@ -1536,13 +1592,47 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
|
|||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
} else
|
||||
return false;
|
||||
|
||||
const Function &Fn = MF.getFunction();
|
||||
DiagnosticInfoUnsupported BadInit(
|
||||
Fn, "unsupported initializer for address space", MI.getDebugLoc());
|
||||
Fn.getContext().diagnose(BadInit);
|
||||
const Function &Fn = MF.getFunction();
|
||||
DiagnosticInfoUnsupported BadInit(
|
||||
Fn, "unsupported initializer for address space", MI.getDebugLoc());
|
||||
Fn.getContext().diagnose(BadInit);
|
||||
return true;
|
||||
}
|
||||
|
||||
const SITargetLowering *TLI = ST.getTargetLowering();
|
||||
|
||||
if (TLI->shouldEmitFixup(GV)) {
|
||||
buildPCRelGlobalAddress(DstReg, Ty, B, GV, 0);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (TLI->shouldEmitPCReloc(GV)) {
|
||||
buildPCRelGlobalAddress(DstReg, Ty, B, GV, 0, SIInstrInfo::MO_REL32);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
|
||||
Register GOTAddr = MRI.createGenericVirtualRegister(PtrTy);
|
||||
|
||||
MachineMemOperand *GOTMMO = MF.getMachineMemOperand(
|
||||
MachinePointerInfo::getGOT(MF),
|
||||
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
|
||||
MachineMemOperand::MOInvariant,
|
||||
8 /*Size*/, 8 /*Align*/);
|
||||
|
||||
buildPCRelGlobalAddress(GOTAddr, PtrTy, B, GV, 0, SIInstrInfo::MO_GOTPCREL32);
|
||||
|
||||
if (Ty.getSizeInBits() == 32) {
|
||||
// Truncate if this is a 32-bit constant adrdess.
|
||||
auto Load = B.buildLoad(PtrTy, GOTAddr, *GOTMMO);
|
||||
B.buildExtract(DstReg, Load, 0);
|
||||
} else
|
||||
B.buildLoad(DstReg, GOTAddr, *GOTMMO);
|
||||
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
|
||||
#include "AMDGPUArgumentUsageInfo.h"
|
||||
#include "SIInstrInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
|
@ -58,6 +59,10 @@ public:
|
|||
bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B) const;
|
||||
|
||||
bool buildPCRelGlobalAddress(
|
||||
Register DstReg, LLT PtrTy, MachineIRBuilder &B, const GlobalValue *GV,
|
||||
unsigned Offset, unsigned GAFlags = SIInstrInfo::MO_NONE) const;
|
||||
|
||||
bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B) const;
|
||||
bool legalizeLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
|
|
|
@ -186,6 +186,7 @@ private:
|
|||
|
||||
unsigned isCFIntrinsic(const SDNode *Intr) const;
|
||||
|
||||
public:
|
||||
/// \returns True if fixup needs to be emitted for given global value \p GV,
|
||||
/// false otherwise.
|
||||
bool shouldEmitFixup(const GlobalValue *GV) const;
|
||||
|
@ -198,6 +199,7 @@ private:
|
|||
/// global value \p GV, false otherwise.
|
||||
bool shouldEmitPCReloc(const GlobalValue *GV) const;
|
||||
|
||||
private:
|
||||
// Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
|
||||
// three offsets (voffset, soffset and instoffset) into the SDValue[3] array
|
||||
// pointed to by Offsets.
|
||||
|
|
|
@ -0,0 +1,156 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=HSA %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=PAL %s
|
||||
|
||||
@external_constant = external addrspace(4) constant i32, align 4
|
||||
@external_constant32 = external addrspace(6) constant i32, align 4
|
||||
@external_global = external addrspace(1) global i32, align 4
|
||||
|
||||
@internal_constant = internal addrspace(4) constant i32 9, align 4
|
||||
@internal_constant32 = internal addrspace(6) constant i32 9, align 4
|
||||
@internal_global = internal addrspace(1) global i32 9, align 4
|
||||
|
||||
|
||||
define i32 addrspace(4)* @external_constant_got() {
|
||||
; HSA-LABEL: name: external_constant_got
|
||||
; HSA: bb.1 (%ir-block.0):
|
||||
; HSA: liveins: $sgpr30_sgpr31
|
||||
; HSA: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; HSA: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant + 4, target-flags(amdgpu-gotprel32-hi) @external_constant + 4, implicit-def $scc
|
||||
; HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4)
|
||||
; HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p4)
|
||||
; HSA: $vgpr0 = COPY [[UV]](s32)
|
||||
; HSA: $vgpr1 = COPY [[UV1]](s32)
|
||||
; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
|
||||
; PAL-LABEL: name: external_constant_got
|
||||
; PAL: bb.1 (%ir-block.0):
|
||||
; PAL: liveins: $sgpr30_sgpr31
|
||||
; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET @external_constant + 4, 0, implicit-def $scc
|
||||
; PAL: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4)
|
||||
; PAL: $vgpr0 = COPY [[UV]](s32)
|
||||
; PAL: $vgpr1 = COPY [[UV1]](s32)
|
||||
; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
|
||||
ret i32 addrspace(4)* @external_constant
|
||||
}
|
||||
|
||||
define i32 addrspace(1)* @external_global_got() {
|
||||
; HSA-LABEL: name: external_global_got
|
||||
; HSA: bb.1 (%ir-block.0):
|
||||
; HSA: liveins: $sgpr30_sgpr31
|
||||
; HSA: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; HSA: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 4, implicit-def $scc
|
||||
; HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4)
|
||||
; HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1)
|
||||
; HSA: $vgpr0 = COPY [[UV]](s32)
|
||||
; HSA: $vgpr1 = COPY [[UV1]](s32)
|
||||
; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
|
||||
; PAL-LABEL: name: external_global_got
|
||||
; PAL: bb.1 (%ir-block.0):
|
||||
; PAL: liveins: $sgpr30_sgpr31
|
||||
; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 4, implicit-def $scc
|
||||
; PAL: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4)
|
||||
; PAL: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1)
|
||||
; PAL: $vgpr0 = COPY [[UV]](s32)
|
||||
; PAL: $vgpr1 = COPY [[UV1]](s32)
|
||||
; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
|
||||
ret i32 addrspace(1)* @external_global
|
||||
}
|
||||
|
||||
define i32 addrspace(4)* @internal_constant_pcrel() {
|
||||
; HSA-LABEL: name: internal_constant_pcrel
|
||||
; HSA: bb.1 (%ir-block.0):
|
||||
; HSA: liveins: $sgpr30_sgpr31
|
||||
; HSA: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; HSA: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant + 4, target-flags(amdgpu-rel32-hi) @internal_constant + 4, implicit-def $scc
|
||||
; HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4)
|
||||
; HSA: $vgpr0 = COPY [[UV]](s32)
|
||||
; HSA: $vgpr1 = COPY [[UV1]](s32)
|
||||
; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
|
||||
; PAL-LABEL: name: internal_constant_pcrel
|
||||
; PAL: bb.1 (%ir-block.0):
|
||||
; PAL: liveins: $sgpr30_sgpr31
|
||||
; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET @internal_constant + 4, 0, implicit-def $scc
|
||||
; PAL: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4)
|
||||
; PAL: $vgpr0 = COPY [[UV]](s32)
|
||||
; PAL: $vgpr1 = COPY [[UV1]](s32)
|
||||
; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
|
||||
ret i32 addrspace(4)* @internal_constant
|
||||
}
|
||||
|
||||
define i32 addrspace(1)* @internal_global_pcrel() {
|
||||
; HSA-LABEL: name: internal_global_pcrel
|
||||
; HSA: bb.1 (%ir-block.0):
|
||||
; HSA: liveins: $sgpr30_sgpr31
|
||||
; HSA: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; HSA: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 4, implicit-def $scc
|
||||
; HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1)
|
||||
; HSA: $vgpr0 = COPY [[UV]](s32)
|
||||
; HSA: $vgpr1 = COPY [[UV1]](s32)
|
||||
; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
|
||||
; PAL-LABEL: name: internal_global_pcrel
|
||||
; PAL: bb.1 (%ir-block.0):
|
||||
; PAL: liveins: $sgpr30_sgpr31
|
||||
; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 4, implicit-def $scc
|
||||
; PAL: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1)
|
||||
; PAL: $vgpr0 = COPY [[UV]](s32)
|
||||
; PAL: $vgpr1 = COPY [[UV1]](s32)
|
||||
; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
|
||||
ret i32 addrspace(1)* @internal_global
|
||||
}
|
||||
|
||||
define i32 addrspace(6)* @external_constant32_got() {
|
||||
; HSA-LABEL: name: external_constant32_got
|
||||
; HSA: bb.1 (%ir-block.0):
|
||||
; HSA: liveins: $sgpr30_sgpr31
|
||||
; HSA: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; HSA: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant32 + 4, target-flags(amdgpu-gotprel32-hi) @external_constant32 + 4, implicit-def $scc
|
||||
; HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4)
|
||||
; HSA: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0
|
||||
; HSA: $vgpr0 = COPY [[EXTRACT]](p6)
|
||||
; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0
|
||||
; PAL-LABEL: name: external_constant32_got
|
||||
; PAL: bb.1 (%ir-block.0):
|
||||
; PAL: liveins: $sgpr30_sgpr31
|
||||
; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET @external_constant32 + 4, 0, implicit-def $scc
|
||||
; PAL: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0
|
||||
; PAL: $vgpr0 = COPY [[EXTRACT]](p6)
|
||||
; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0
|
||||
ret i32 addrspace(6)* @external_constant32
|
||||
}
|
||||
|
||||
define i32 addrspace(6)* @internal_constant32_pcrel() {
|
||||
; HSA-LABEL: name: internal_constant32_pcrel
|
||||
; HSA: bb.1 (%ir-block.0):
|
||||
; HSA: liveins: $sgpr30_sgpr31
|
||||
; HSA: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; HSA: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant32 + 4, target-flags(amdgpu-rel32-hi) @internal_constant32 + 4, implicit-def $scc
|
||||
; HSA: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0
|
||||
; HSA: $vgpr0 = COPY [[EXTRACT]](p6)
|
||||
; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0
|
||||
; PAL-LABEL: name: internal_constant32_pcrel
|
||||
; PAL: bb.1 (%ir-block.0):
|
||||
; PAL: liveins: $sgpr30_sgpr31
|
||||
; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET @internal_constant32 + 4, 0, implicit-def $scc
|
||||
; PAL: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0
|
||||
; PAL: $vgpr0 = COPY [[EXTRACT]](p6)
|
||||
; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
|
||||
; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0
|
||||
ret i32 addrspace(6)* @internal_constant32
|
||||
}
|
Loading…
Reference in New Issue