AMDGPU: Add llvm.amdgcn.dispatch.ptr intrinsic

Summary:
This returns a pointer to the dispatch packet, which can be used to load
information about the kernel dispach.

Reviewers: arsenm

Subscribers: arsenm, llvm-commits

Differential Revision: http://reviews.llvm.org/D14898

llvm-svn: 254116
This commit is contained in:
Tom Stellard 2015-11-26 00:43:29 +00:00
parent 1c80b9d315
commit 48f29f21ee
7 changed files with 48 additions and 1 deletions

View File

@ -127,4 +127,8 @@ def int_amdgcn_s_dcache_wb_vol :
GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
Intrinsic<[], [], []>;
def int_amdgcn_dispatch_ptr :
GCCBuiltin<"__builtin_amdgcn_disptch_ptr">,
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
}

View File

@ -105,7 +105,8 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
{ "llvm.r600.read.global.size.x", "amdgpu-dispatch-ptr" },
{ "llvm.r600.read.global.size.y", "amdgpu-dispatch-ptr" },
{ "llvm.r600.read.global.size.z", "amdgpu-dispatch-ptr" }
{ "llvm.r600.read.global.size.z", "amdgpu-dispatch-ptr" },
{ "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }
};
// TODO: Intrinsics that require queue ptr.

View File

@ -528,6 +528,9 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
AMD_CODE_PROPERTY_IS_PTR64;
if (MFI->hasDispatchPtr())
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
header.kernarg_segment_byte_size = MFI->ABIArgOffset;
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR;

View File

@ -646,6 +646,18 @@ SDValue SITargetLowering::LowerFormalArguments(
CCInfo.AllocateReg(ScratchPtrRegHi);
MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass);
MF.addLiveIn(ScratchPtrReg, &AMDGPU::SReg_64RegClass);
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
if (Subtarget->isAmdHsaOS() && MFI->hasDispatchPtr()) {
unsigned DispatchPtrReg =
TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR);
unsigned DispatchPtrRegLo =
TRI->getPhysRegSubReg(DispatchPtrReg, &AMDGPU::SReg_32RegClass, 0);
unsigned DispatchPtrRegHi =
TRI->getPhysRegSubReg(DispatchPtrReg, &AMDGPU::SReg_32RegClass, 1);
CCInfo.AllocateReg(DispatchPtrRegLo);
CCInfo.AllocateReg(DispatchPtrRegHi);
MF.addLiveIn(DispatchPtrReg, &AMDGPU::SReg_64RegClass);
}
}
if (Info->getShaderType() == ShaderType::COMPUTE) {
@ -1053,6 +1065,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// TODO: Should this propagate fast-math-flags?
switch (IntrinsicID) {
case Intrinsic::amdgcn_dispatch_ptr:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR), VT);
case Intrinsic::r600_read_ngroups_x:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::NGROUPS_X, false);

View File

@ -510,6 +510,7 @@ bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
enum PreloadedValue Value) const {
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
switch (Value) {
case SIRegisterInfo::TGID_X:
@ -525,6 +526,11 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
case SIRegisterInfo::SCRATCH_PTR:
return AMDGPU::SGPR2_SGPR3;
case SIRegisterInfo::INPUT_PTR:
if (ST.isAmdHsaOS())
return MFI->hasDispatchPtr() ? AMDGPU::SGPR2_SGPR3 : AMDGPU::SGPR0_SGPR1;
return AMDGPU::SGPR0_SGPR1;
case SIRegisterInfo::DISPATCH_PTR:
assert(MFI->hasDispatchPtr());
return AMDGPU::SGPR0_SGPR1;
case SIRegisterInfo::TIDIG_X:
return AMDGPU::VGPR0;

View File

@ -99,6 +99,7 @@ public:
enum PreloadedValue {
// SGPRS:
SCRATCH_PTR = 0,
DISPATCH_PTR = 1,
INPUT_PTR = 3,
TGID_X = 10,
TGID_Y = 11,

View File

@ -0,0 +1,16 @@
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}test:
; GCN: enable_sgpr_dispatch_ptr = 1
; GCN: s_load_dword s{{[0-9]+}}, s[0:1], 0x0
define void @test(i32 addrspace(1)* %out) {
%dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
%header_ptr = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
%value = load i32, i32 addrspace(2)* %header_ptr
store i32 %value, i32 addrspace(1)* %out
ret void
}
declare noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
attributes #0 = { readnone }