forked from OSchip/llvm-project
AMDGPU/GlobalISel: Fix skipping unused kernel arguments
The tests in a5b9ad7e9a
actually failed
the verifier, which for some reason is not the default. Also add tests
for 0-sized function arguments, which do not add entries to the
expected register lists.
This commit is contained in:
parent
b172cd7812
commit
42bb481442
|
@ -523,8 +523,10 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
|
|||
uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
|
||||
ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
|
||||
|
||||
if (Arg.use_empty())
|
||||
if (Arg.use_empty()) {
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
ArrayRef<Register> OrigArgRegs = VRegs[i];
|
||||
Register ArgReg =
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -global-isel %s -o - | FileCheck -check-prefix=HSA-VI %s
|
||||
; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -global-isel %s -o - | FileCheck -check-prefix=LEGACY-MESA-VI %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSA-VI %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=LEGACY-MESA-VI %s
|
||||
|
||||
define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
|
||||
; HSA-VI-LABEL: name: i8_arg
|
||||
|
@ -1070,17 +1070,55 @@ define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwi
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @empty_struct_arg({} %in) nounwind {
|
||||
; 0-sized arguments do not add a slot to the argument register set, so
|
||||
; waste an index in the virtual register array.
|
||||
define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind {
|
||||
; HSA-VI-LABEL: name: empty_struct_arg
|
||||
; HSA-VI: bb.1 (%ir-block.0):
|
||||
; HSA-VI: liveins: $sgpr4_sgpr5
|
||||
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
|
||||
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4)
|
||||
; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; HSA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: empty_struct_arg
|
||||
; LEGACY-MESA-VI: bb.1 (%ir-block.0):
|
||||
; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1
|
||||
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
|
||||
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
|
||||
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
store i32 %arg1, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind {
|
||||
; HSA-VI-LABEL: name: empty_array_arg
|
||||
; HSA-VI: bb.1 (%ir-block.0):
|
||||
; HSA-VI: liveins: $sgpr4_sgpr5
|
||||
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
|
||||
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4)
|
||||
; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; HSA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: empty_array_arg
|
||||
; LEGACY-MESA-VI: bb.1 (%ir-block.0):
|
||||
; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1
|
||||
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
|
||||
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
|
||||
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
store i32 %arg1, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -1171,13 +1209,15 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0,
|
|||
; HSA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 13
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4)
|
||||
; HSA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s96), 0
|
||||
; HSA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD1]](s96), 32
|
||||
; HSA-VI: [[C2:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
|
||||
; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C2]](p1)
|
||||
; HSA-VI: G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE %3:_(s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE %4:_(s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[EXTRACT2]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: packed_struct_argument_alignment
|
||||
; LEGACY-MESA-VI: bb.1 (%ir-block.1):
|
||||
|
@ -1190,13 +1230,15 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0,
|
|||
; LEGACY-MESA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 49
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s96), 0
|
||||
; LEGACY-MESA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD1]](s96), 32
|
||||
; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
|
||||
; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C2]](p1)
|
||||
; LEGACY-MESA-VI: G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE %3:_(s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE %4:_(s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[EXTRACT2]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
%val0 = extractvalue <{i32, i64}> %arg0, 0
|
||||
%val1 = extractvalue <{i32, i64}> %arg0, 1
|
||||
|
|
|
@ -2,6 +2,34 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs -o - %s | FileCheck %s
|
||||
; FIXME: pre-VI should have same ABI without legal i16 operations.
|
||||
|
||||
define void @void_func_empty_arg({} %arg0, i32 %arg1) #0 {
|
||||
; CHECK-LABEL: name: void_func_empty_arg
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]]
|
||||
store i32 %arg1, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) #0 {
|
||||
; CHECK-LABEL: name: void_func_empty_array
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY2]]
|
||||
store i32 %arg1, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @void_func_i1(i1 %arg0) #0 {
|
||||
; CHECK-LABEL: name: void_func_i1
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
|
|
Loading…
Reference in New Issue