AMDGPU/GlobalISel: Fix skipping unused kernel arguments

The tests in a5b9ad7e9a actually failed
the verifier, which for some reason is not the default. Also add tests
for 0-sized function arguments, which do not add entries to the
expected register lists.
This commit is contained in:
Matt Arsenault 2020-07-07 13:57:09 -04:00
parent b172cd7812
commit 42bb481442
3 changed files with 82 additions and 10 deletions

View File

@ -523,8 +523,10 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
if (Arg.use_empty())
if (Arg.use_empty()) {
++i;
continue;
}
ArrayRef<Register> OrigArgRegs = VRegs[i];
Register ArgReg =

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -global-isel %s -o - | FileCheck -check-prefix=HSA-VI %s
; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -global-isel %s -o - | FileCheck -check-prefix=LEGACY-MESA-VI %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSA-VI %s
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=LEGACY-MESA-VI %s
define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
; HSA-VI-LABEL: name: i8_arg
@ -1070,17 +1070,55 @@ define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwi
ret void
}
define amdgpu_kernel void @empty_struct_arg({} %in) nounwind {
; 0-sized arguments do not add a slot to the argument register set, so
; waste an index in the virtual register array.
define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind {
; HSA-VI-LABEL: name: empty_struct_arg
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4)
; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; HSA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: empty_struct_arg
; LEGACY-MESA-VI: bb.1 (%ir-block.0):
; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, addrspace 4)
; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
store i32 %arg1, i32 addrspace(1)* undef
ret void
}
define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind {
; HSA-VI-LABEL: name: empty_array_arg
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4)
; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; HSA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: empty_array_arg
; LEGACY-MESA-VI: bb.1 (%ir-block.0):
; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, addrspace 4)
; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
store i32 %arg1, i32 addrspace(1)* undef
ret void
}
@ -1171,13 +1209,15 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0,
; HSA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 13
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4)
; HSA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s96), 0
; HSA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD1]](s96), 32
; HSA-VI: [[C2:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C2]](p1)
; HSA-VI: G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE %3:_(s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE %4:_(s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[EXTRACT2]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: packed_struct_argument_alignment
; LEGACY-MESA-VI: bb.1 (%ir-block.1):
@ -1190,13 +1230,15 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0,
; LEGACY-MESA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 49
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4)
; LEGACY-MESA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s96), 0
; LEGACY-MESA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD1]](s96), 32
; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C2]](p1)
; LEGACY-MESA-VI: G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE %3:_(s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE %4:_(s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[EXTRACT2]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
%val0 = extractvalue <{i32, i64}> %arg0, 0
%val1 = extractvalue <{i32, i64}> %arg0, 1

View File

@ -2,6 +2,34 @@
; RUN: llc -march=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs -o - %s | FileCheck %s
; FIXME: pre-VI should have same ABI without legal i16 operations.
define void @void_func_empty_arg({} %arg0, i32 %arg1) #0 {
; CHECK-LABEL: name: void_func_empty_arg
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]]
store i32 %arg1, i32 addrspace(1)* undef
ret void
}
define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) #0 {
; CHECK-LABEL: name: void_func_empty_array
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]]
store i32 %arg1, i32 addrspace(1)* undef
ret void
}
define void @void_func_i1(i1 %arg0) #0 {
; CHECK-LABEL: name: void_func_i1
; CHECK: bb.1 (%ir-block.0):