forked from OSchip/llvm-project
AMDGPU: Add llvm.amdgcn.dispatch.ptr intrinsic
Summary: This returns a pointer to the dispatch packet, which can be used to load information about the kernel dispach. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D14898 llvm-svn: 254116
This commit is contained in:
parent
1c80b9d315
commit
48f29f21ee
|
@ -127,4 +127,8 @@ def int_amdgcn_s_dcache_wb_vol :
|
|||
GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
|
||||
Intrinsic<[], [], []>;
|
||||
|
||||
def int_amdgcn_dispatch_ptr :
|
||||
GCCBuiltin<"__builtin_amdgcn_disptch_ptr">,
|
||||
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
|
||||
|
||||
}
|
||||
|
|
|
@ -105,7 +105,8 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
|
|||
|
||||
{ "llvm.r600.read.global.size.x", "amdgpu-dispatch-ptr" },
|
||||
{ "llvm.r600.read.global.size.y", "amdgpu-dispatch-ptr" },
|
||||
{ "llvm.r600.read.global.size.z", "amdgpu-dispatch-ptr" }
|
||||
{ "llvm.r600.read.global.size.z", "amdgpu-dispatch-ptr" },
|
||||
{ "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }
|
||||
};
|
||||
|
||||
// TODO: Intrinsics that require queue ptr.
|
||||
|
|
|
@ -528,6 +528,9 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
|
|||
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
|
||||
AMD_CODE_PROPERTY_IS_PTR64;
|
||||
|
||||
if (MFI->hasDispatchPtr())
|
||||
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
|
||||
|
||||
header.kernarg_segment_byte_size = MFI->ABIArgOffset;
|
||||
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
|
||||
header.workitem_vgpr_count = KernelInfo.NumVGPR;
|
||||
|
|
|
@ -646,6 +646,18 @@ SDValue SITargetLowering::LowerFormalArguments(
|
|||
CCInfo.AllocateReg(ScratchPtrRegHi);
|
||||
MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass);
|
||||
MF.addLiveIn(ScratchPtrReg, &AMDGPU::SReg_64RegClass);
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
if (Subtarget->isAmdHsaOS() && MFI->hasDispatchPtr()) {
|
||||
unsigned DispatchPtrReg =
|
||||
TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR);
|
||||
unsigned DispatchPtrRegLo =
|
||||
TRI->getPhysRegSubReg(DispatchPtrReg, &AMDGPU::SReg_32RegClass, 0);
|
||||
unsigned DispatchPtrRegHi =
|
||||
TRI->getPhysRegSubReg(DispatchPtrReg, &AMDGPU::SReg_32RegClass, 1);
|
||||
CCInfo.AllocateReg(DispatchPtrRegLo);
|
||||
CCInfo.AllocateReg(DispatchPtrRegHi);
|
||||
MF.addLiveIn(DispatchPtrReg, &AMDGPU::SReg_64RegClass);
|
||||
}
|
||||
}
|
||||
|
||||
if (Info->getShaderType() == ShaderType::COMPUTE) {
|
||||
|
@ -1053,6 +1065,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
// TODO: Should this propagate fast-math-flags?
|
||||
|
||||
switch (IntrinsicID) {
|
||||
case Intrinsic::amdgcn_dispatch_ptr:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass,
|
||||
TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR), VT);
|
||||
|
||||
case Intrinsic::r600_read_ngroups_x:
|
||||
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
|
||||
SI::KernelInputOffsets::NGROUPS_X, false);
|
||||
|
|
|
@ -510,6 +510,7 @@ bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
|
|||
unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
|
||||
enum PreloadedValue Value) const {
|
||||
|
||||
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
switch (Value) {
|
||||
case SIRegisterInfo::TGID_X:
|
||||
|
@ -525,6 +526,11 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
|
|||
case SIRegisterInfo::SCRATCH_PTR:
|
||||
return AMDGPU::SGPR2_SGPR3;
|
||||
case SIRegisterInfo::INPUT_PTR:
|
||||
if (ST.isAmdHsaOS())
|
||||
return MFI->hasDispatchPtr() ? AMDGPU::SGPR2_SGPR3 : AMDGPU::SGPR0_SGPR1;
|
||||
return AMDGPU::SGPR0_SGPR1;
|
||||
case SIRegisterInfo::DISPATCH_PTR:
|
||||
assert(MFI->hasDispatchPtr());
|
||||
return AMDGPU::SGPR0_SGPR1;
|
||||
case SIRegisterInfo::TIDIG_X:
|
||||
return AMDGPU::VGPR0;
|
||||
|
|
|
@ -99,6 +99,7 @@ public:
|
|||
enum PreloadedValue {
|
||||
// SGPRS:
|
||||
SCRATCH_PTR = 0,
|
||||
DISPATCH_PTR = 1,
|
||||
INPUT_PTR = 3,
|
||||
TGID_X = 10,
|
||||
TGID_Y = 11,
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}test:
|
||||
; GCN: enable_sgpr_dispatch_ptr = 1
|
||||
; GCN: s_load_dword s{{[0-9]+}}, s[0:1], 0x0
|
||||
define void @test(i32 addrspace(1)* %out) {
|
||||
%dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
%header_ptr = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
|
||||
%value = load i32, i32 addrspace(2)* %header_ptr
|
||||
store i32 %value, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
|
||||
attributes #0 = { readnone }
|
Loading…
Reference in New Issue