forked from OSchip/llvm-project
AMDGPU/GlobalISel: Handle more input argument intrinsics
llvm-svn: 364836
This commit is contained in:
parent
9e8e8c60fa
commit
bae3636f96
|
@ -198,6 +198,58 @@ static void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
|
|||
}
|
||||
}
|
||||
|
||||
// Allocate special inputs passed in user SGPRs.
|
||||
static void allocateHSAUserSGPRs(CCState &CCInfo,
|
||||
MachineIRBuilder &MIRBuilder,
|
||||
MachineFunction &MF,
|
||||
const SIRegisterInfo &TRI,
|
||||
SIMachineFunctionInfo &Info) {
|
||||
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
|
||||
if (Info.hasPrivateSegmentBuffer()) {
|
||||
unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
|
||||
MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
|
||||
CCInfo.AllocateReg(PrivateSegmentBufferReg);
|
||||
}
|
||||
|
||||
if (Info.hasDispatchPtr()) {
|
||||
unsigned DispatchPtrReg = Info.addDispatchPtr(TRI);
|
||||
MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
|
||||
CCInfo.AllocateReg(DispatchPtrReg);
|
||||
}
|
||||
|
||||
if (Info.hasQueuePtr()) {
|
||||
unsigned QueuePtrReg = Info.addQueuePtr(TRI);
|
||||
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
|
||||
CCInfo.AllocateReg(QueuePtrReg);
|
||||
}
|
||||
|
||||
if (Info.hasKernargSegmentPtr()) {
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
|
||||
const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
|
||||
Register VReg = MRI.createGenericVirtualRegister(P4);
|
||||
MRI.addLiveIn(InputPtrReg, VReg);
|
||||
MIRBuilder.getMBB().addLiveIn(InputPtrReg);
|
||||
MIRBuilder.buildCopy(VReg, InputPtrReg);
|
||||
CCInfo.AllocateReg(InputPtrReg);
|
||||
}
|
||||
|
||||
if (Info.hasDispatchID()) {
|
||||
unsigned DispatchIDReg = Info.addDispatchID(TRI);
|
||||
MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
|
||||
CCInfo.AllocateReg(DispatchIDReg);
|
||||
}
|
||||
|
||||
if (Info.hasFlatScratchInit()) {
|
||||
unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI);
|
||||
MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
|
||||
CCInfo.AllocateReg(FlatScratchInitReg);
|
||||
}
|
||||
|
||||
// TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
|
||||
// these from the dispatch pointer.
|
||||
}
|
||||
|
||||
static void allocateSystemSGPRs(CCState &CCInfo,
|
||||
MachineFunction &MF,
|
||||
SIMachineFunctionInfo &Info,
|
||||
|
@ -272,51 +324,12 @@ bool AMDGPUCallLowering::lowerFormalArguments(
|
|||
SmallVector<CCValAssign, 16> ArgLocs;
|
||||
CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
|
||||
|
||||
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
|
||||
if (Info->hasPrivateSegmentBuffer()) {
|
||||
Register PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
|
||||
MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
|
||||
CCInfo.AllocateReg(PrivateSegmentBufferReg);
|
||||
}
|
||||
|
||||
if (Info->hasDispatchPtr()) {
|
||||
Register DispatchPtrReg = Info->addDispatchPtr(*TRI);
|
||||
// FIXME: Need to add reg as live-in
|
||||
CCInfo.AllocateReg(DispatchPtrReg);
|
||||
}
|
||||
|
||||
if (Info->hasQueuePtr()) {
|
||||
Register QueuePtrReg = Info->addQueuePtr(*TRI);
|
||||
// FIXME: Need to add reg as live-in
|
||||
CCInfo.AllocateReg(QueuePtrReg);
|
||||
}
|
||||
|
||||
if (Info->hasKernargSegmentPtr()) {
|
||||
Register InputPtrReg = Info->addKernargSegmentPtr(*TRI);
|
||||
const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
|
||||
Register VReg = MRI.createGenericVirtualRegister(P2);
|
||||
MRI.addLiveIn(InputPtrReg, VReg);
|
||||
MIRBuilder.getMBB().addLiveIn(InputPtrReg);
|
||||
MIRBuilder.buildCopy(VReg, InputPtrReg);
|
||||
CCInfo.AllocateReg(InputPtrReg);
|
||||
}
|
||||
|
||||
if (Info->hasDispatchID()) {
|
||||
unsigned DispatchIDReg = Info->addDispatchID(*TRI);
|
||||
// FIXME: Need to add reg as live-in
|
||||
CCInfo.AllocateReg(DispatchIDReg);
|
||||
}
|
||||
|
||||
if (Info->hasFlatScratchInit()) {
|
||||
unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
|
||||
// FIXME: Need to add reg as live-in
|
||||
CCInfo.AllocateReg(FlatScratchInitReg);
|
||||
}
|
||||
|
||||
// The infrastructure for normal calling convention lowering is essentially
|
||||
// useless for kernels. We want to avoid any kind of legalization or argument
|
||||
// splitting.
|
||||
if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
|
||||
allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info);
|
||||
|
||||
unsigned i = 0;
|
||||
const unsigned KernArgBaseAlign = 16;
|
||||
const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
|
||||
|
@ -352,6 +365,12 @@ bool AMDGPUCallLowering::lowerFormalArguments(
|
|||
return true;
|
||||
}
|
||||
|
||||
if (Info->hasImplicitBufferPtr()) {
|
||||
unsigned ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
|
||||
MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
|
||||
CCInfo.AllocateReg(ImplicitBufferPtrReg);
|
||||
}
|
||||
|
||||
unsigned NumArgs = F.arg_size();
|
||||
Function::const_arg_iterator CurOrigArg = F.arg_begin();
|
||||
const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
|
||||
|
|
|
@ -1237,6 +1237,18 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
|
|||
case Intrinsic::amdgcn_workgroup_id_z:
|
||||
return legalizePreloadedArgIntrin(MI, MRI, B,
|
||||
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
|
||||
case Intrinsic::amdgcn_dispatch_ptr:
|
||||
return legalizePreloadedArgIntrin(MI, MRI, B,
|
||||
AMDGPUFunctionArgInfo::DISPATCH_PTR);
|
||||
case Intrinsic::amdgcn_queue_ptr:
|
||||
return legalizePreloadedArgIntrin(MI, MRI, B,
|
||||
AMDGPUFunctionArgInfo::QUEUE_PTR);
|
||||
case Intrinsic::amdgcn_implicit_buffer_ptr:
|
||||
return legalizePreloadedArgIntrin(
|
||||
MI, MRI, B, AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR);
|
||||
case Intrinsic::amdgcn_dispatch_id:
|
||||
return legalizePreloadedArgIntrin(MI, MRI, B,
|
||||
AMDGPUFunctionArgInfo::DISPATCH_ID);
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -stop-after=irtranslator -global-isel %s -o - | FileCheck %s
|
||||
|
||||
; Check that we correctly skip over disabled inputs
|
||||
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr0
|
||||
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr2
|
||||
; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]](s32), [[S0]](s32), [[S0]](s32), [[V0]](s32)
|
||||
define amdgpu_ps void @ps0(float inreg %arg0, float %psinput0, float %psinput1) #1 {
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
|
||||
; CHECK-LABEL: name: test_f32_inreg
|
||||
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr0
|
||||
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr2
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]]
|
||||
define amdgpu_vs void @test_f32_inreg(float inreg %arg0) {
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
|
||||
|
@ -18,7 +18,7 @@ define amdgpu_vs void @test_f32(float %arg0) {
|
|||
}
|
||||
|
||||
; CHECK-LABEL: name: test_ptr2_byval
|
||||
; CHECK: [[S01:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
|
||||
; CHECK: [[S01:%[0-9]+]]:_(p4) = COPY $sgpr2_sgpr3
|
||||
; CHECK: G_LOAD [[S01]]
|
||||
define amdgpu_vs void @test_ptr2_byval(i32 addrspace(4)* byval %arg0) {
|
||||
%tmp0 = load volatile i32, i32 addrspace(4)* %arg0
|
||||
|
@ -26,7 +26,7 @@ define amdgpu_vs void @test_ptr2_byval(i32 addrspace(4)* byval %arg0) {
|
|||
}
|
||||
|
||||
; CHECK-LABEL: name: test_ptr2_inreg
|
||||
; CHECK: [[S01:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
|
||||
; CHECK: [[S01:%[0-9]+]]:_(p4) = COPY $sgpr2_sgpr3
|
||||
; CHECK: G_LOAD [[S01]]
|
||||
define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) {
|
||||
%tmp0 = load volatile i32, i32 addrspace(4)* %arg0
|
||||
|
@ -34,8 +34,8 @@ define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) {
|
|||
}
|
||||
|
||||
; CHECK-LABEL: name: test_sgpr_alignment0
|
||||
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr0
|
||||
; CHECK: [[S23:%[0-9]+]]:_(p4) = COPY $sgpr2_sgpr3
|
||||
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr2
|
||||
; CHECK: [[S23:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
|
||||
; CHECK: G_LOAD [[S23]]
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]]
|
||||
define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* inreg %arg1) {
|
||||
|
@ -45,8 +45,8 @@ define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)*
|
|||
}
|
||||
|
||||
; CHECK-LABEL: name: test_order
|
||||
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr0
|
||||
; CHECK: [[S1:%[0-9]+]]:_(s32) = COPY $sgpr1
|
||||
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr2
|
||||
; CHECK: [[S1:%[0-9]+]]:_(s32) = COPY $sgpr3
|
||||
; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[V1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[V0]](s32), [[S0]](s32), [[V1]](s32), [[S1]](s32)
|
||||
|
@ -56,8 +56,8 @@ define amdgpu_vs void @test_order(float inreg %arg0, float inreg %arg1, float %a
|
|||
}
|
||||
|
||||
; CHECK-LABEL: name: ret_struct
|
||||
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr0
|
||||
; CHECK: [[S1:%[0-9]+]]:_(s32) = COPY $sgpr1
|
||||
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr2
|
||||
; CHECK: [[S1:%[0-9]+]]:_(s32) = COPY $sgpr3
|
||||
; CHECK: $sgpr0 = COPY [[S0]]
|
||||
; CHECK: $sgpr1 = COPY [[S1]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
declare i64 @llvm.amdgcn.dispatch.id() #1
|
||||
|
||||
; GCN-LABEL: {{^}}dispatch_id:
|
||||
; GCN: .amd_kernel_code_t
|
||||
; GCN: enable_sgpr_dispatch_id = 1
|
||||
|
||||
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s6
|
||||
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s7
|
||||
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define amdgpu_kernel void @dispatch_id(i64 addrspace(1)* %out) #0 {
|
||||
%tmp0 = call i64 @llvm.amdgcn.dispatch.id()
|
||||
store i64 %tmp0, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
|
@ -0,0 +1,18 @@
|
|||
; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-code-object-v3 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; FIXME: Error on non-HSA target
|
||||
|
||||
; GCN-LABEL: {{^}}test:
|
||||
; GCN: enable_sgpr_dispatch_ptr = 1
|
||||
; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
|
||||
define amdgpu_kernel void @test(i32 addrspace(1)* %out) {
|
||||
%dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
%header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
|
||||
%value = load i32, i32 addrspace(4)* %header_ptr
|
||||
store i32 %value, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
|
||||
|
||||
attributes #0 = { readnone }
|
|
@ -0,0 +1,17 @@
|
|||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; FIXME: Dropped parts from original test
|
||||
|
||||
; GCN-LABEL: {{^}}test_ps:
|
||||
; GCN: s_load_dword s{{[0-9]+}}, s[0:1], 0x0
|
||||
define amdgpu_ps i32 @test_ps() #1 {
|
||||
%implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr()
|
||||
%buffer_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)*
|
||||
%value = load volatile i32, i32 addrspace(4)* %buffer_ptr
|
||||
ret i32 %value
|
||||
}
|
||||
|
||||
declare i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() #0
|
||||
|
||||
attributes #0 = { nounwind readnone speculatable }
|
||||
attributes #1 = { nounwind }
|
|
@ -0,0 +1,18 @@
|
|||
; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-code-object-v3 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; FIXME: Error on non-hsa target
|
||||
|
||||
; GCN-LABEL: {{^}}test:
|
||||
; GCN: enable_sgpr_queue_ptr = 1
|
||||
; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
|
||||
define amdgpu_kernel void @test(i32 addrspace(1)* %out) {
|
||||
%queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
|
||||
%header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
|
||||
%value = load i32, i32 addrspace(4)* %header_ptr
|
||||
store i32 %value, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; FIXME: Requires stack object to not assert
|
||||
; GCN-LABEL: {{^}}test_ps:
|
||||
|
|
Loading…
Reference in New Issue