forked from OSchip/llvm-project
parent
621d3675cb
commit
48ab526f12
|
@ -312,6 +312,10 @@ def int_amdgcn_dispatch_ptr :
|
|||
GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">,
|
||||
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
|
||||
|
||||
def int_amdgcn_queue_ptr :
|
||||
GCCBuiltin<"__builtin_amdgcn_queue_ptr">,
|
||||
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
|
||||
|
||||
// __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
|
||||
def int_amdgcn_interp_p1 :
|
||||
GCCBuiltin<"__builtin_amdgcn_interp_p1">,
|
||||
|
|
|
@ -104,7 +104,8 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
|
|||
};
|
||||
|
||||
static const StringRef HSAIntrinsicToAttr[][2] = {
|
||||
{ "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }
|
||||
{ "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },
|
||||
{ "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" }
|
||||
};
|
||||
|
||||
// TODO: We should not add the attributes if the known compile time workgroup
|
||||
|
|
|
@ -745,6 +745,12 @@ SDValue SITargetLowering::LowerFormalArguments(
|
|||
CCInfo.AllocateReg(DispatchPtrReg);
|
||||
}
|
||||
|
||||
if (Info->hasQueuePtr()) {
|
||||
unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
|
||||
MF.addLiveIn(QueuePtrReg, &AMDGPU::SReg_64RegClass);
|
||||
CCInfo.AllocateReg(QueuePtrReg);
|
||||
}
|
||||
|
||||
if (Info->hasKernargSegmentPtr()) {
|
||||
unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
|
||||
MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass);
|
||||
|
@ -1450,6 +1456,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
|
||||
switch (IntrinsicID) {
|
||||
case Intrinsic::amdgcn_dispatch_ptr:
|
||||
case Intrinsic::amdgcn_queue_ptr: {
|
||||
if (!Subtarget->isAmdHsaOS()) {
|
||||
DiagnosticInfoUnsupported BadIntrin(
|
||||
*MF.getFunction(), "unsupported hsa intrinsic without hsa target",
|
||||
|
@ -1458,8 +1465,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
return DAG.getUNDEF(VT);
|
||||
}
|
||||
|
||||
auto Reg = IntrinsicID == Intrinsic::amdgcn_dispatch_ptr ?
|
||||
SIRegisterInfo::DISPATCH_PTR : SIRegisterInfo::QUEUE_PTR;
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass,
|
||||
TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR), VT);
|
||||
TRI->getPreloadedValue(MF, Reg), VT);
|
||||
}
|
||||
case Intrinsic::amdgcn_rcp:
|
||||
return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
|
||||
case Intrinsic::amdgcn_rsq:
|
||||
|
|
|
@ -116,6 +116,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
|||
|
||||
if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
|
||||
DispatchPtr = true;
|
||||
|
||||
if (F->hasFnAttribute("amdgpu-queue-ptr"))
|
||||
QueuePtr = true;
|
||||
}
|
||||
|
||||
// We don't need to worry about accessing spills with flat instructions.
|
||||
|
|
|
@ -917,7 +917,8 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
|
|||
assert(MFI->hasDispatchPtr());
|
||||
return MFI->DispatchPtrUserSGPR;
|
||||
case SIRegisterInfo::QUEUE_PTR:
|
||||
llvm_unreachable("not implemented");
|
||||
assert(MFI->hasQueuePtr());
|
||||
return MFI->QueuePtrUserSGPR;
|
||||
case SIRegisterInfo::WORKITEM_ID_X:
|
||||
assert(MFI->hasWorkItemIDX());
|
||||
return AMDGPU::VGPR0;
|
||||
|
|
|
@ -9,6 +9,7 @@ declare i32 @llvm.amdgcn.workitem.id.y() #0
|
|||
declare i32 @llvm.amdgcn.workitem.id.z() #0
|
||||
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
|
||||
|
||||
; HSA: define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
|
||||
define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
|
||||
|
@ -154,6 +155,15 @@ define void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; HSA: define void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 {
|
||||
define void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
|
||||
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr()
|
||||
%bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
|
||||
%val = load i32, i32 addrspace(2)* %bc
|
||||
store i32 %val, i32 addrspace(1)* %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
|
@ -168,3 +178,4 @@ attributes #1 = { nounwind }
|
|||
; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" }
|
||||
; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" }
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: not llc -mtriple=amdgcn-unknown-unknown -mcpu=kaveri -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s
|
||||
|
||||
; ERROR: in function test{{.*}}: unsupported hsa intrinsic without hsa target
|
||||
|
||||
; GCN-LABEL: {{^}}test:
|
||||
; GCN: enable_sgpr_queue_ptr = 1
|
||||
; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
|
||||
define void @test(i32 addrspace(1)* %out) {
|
||||
%queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
|
||||
%header_ptr = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)*
|
||||
%value = load i32, i32 addrspace(2)* %header_ptr
|
||||
store i32 %value, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
Loading…
Reference in New Issue