AMDGPU: Fix assert with indirect call with known required inputs

The attributor can determine that some indirect calls do not require
special inputs. The special inputs will still be present in the ABI,
so we need to allocate the registers and pass undefs.
This commit is contained in:
Matt Arsenault 2021-09-13 21:52:07 -04:00
parent 2bd8493847
commit c305513cc2
2 changed files with 49 additions and 2 deletions

View File

@ -2813,11 +2813,14 @@ void SITargetLowering::passSpecialInputs(
if (IncomingArg) {
InputReg = loadInputValue(DAG, ArgRC, ArgVT, DL, *IncomingArg);
} else {
} else if (InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR) {
// The implicit arg ptr is special because it doesn't have a corresponding
// input for kernels, and is computed from the kernarg segment pointer.
assert(InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
InputReg = getImplicitArgPtr(DAG, DL);
} else {
// We may have proven the input wasn't needed, although the ABI is
// requiring it. We just need to allocate the register appropriately.
InputReg = DAG.getUNDEF(ArgVT);
}
if (OutgoingArg->isRegister()) {

View File

@ -0,0 +1,44 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
; We have an indirect call with a known set of callees, which are
; known to not need any special inputs. The ABI still needs to use the
; register
; FIXME: Passing real values for workitem ID, and 0s that can be undef
; CHECK-LABEL: indirect_call_known_no_special_inputs:
; CHECK: s_mov_b64 s[8:9], 0
; CHECK: s_mov_b32 s12, s6
; CHECK: v_mov_b32_e32 v31, v0
; CHECK: s_swappc_b64
; CHECK: .amdhsa_kernarg_size 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
define amdgpu_kernel void @indirect_call_known_no_special_inputs() {
bb:
%tmp = select i1 undef, void (i8*, i32, i8*)* bitcast (void ()* @wobble to void (i8*, i32, i8*)*), void (i8*, i32, i8*)* bitcast (void ()* @snork to void (i8*, i32, i8*)*)
call void %tmp(i8* undef, i32 undef, i8* undef)
ret void
}
define void @wobble() {
bb:
ret void
}
define void @snork() {
bb:
ret void
}