AMDGPU/GlobalISel: Redo kernel argument load handling

This avoids relying on G_EXTRACT on unusual types, and also properly
decomposes structs into multiple registers. This also preserves the
LLTs in the memory operands.
This commit is contained in:
Matt Arsenault 2021-07-15 14:23:06 -04:00
parent 231bf52119
commit 21a0ef8d19
3 changed files with 355 additions and 137 deletions

View File

@ -384,7 +384,6 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val,
}
void AMDGPUCallLowering::lowerParameterPtr(Register DstReg, MachineIRBuilder &B,
Type *ParamTy,
uint64_t Offset) const {
MachineFunction &MF = B.getMF();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@ -398,26 +397,38 @@ void AMDGPUCallLowering::lowerParameterPtr(Register DstReg, MachineIRBuilder &B,
B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);
}
void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, Type *ParamTy,
uint64_t Offset, Align Alignment,
Register DstReg) const {
void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, ArgInfo &OrigArg,
uint64_t Offset,
Align Alignment) const {
MachineFunction &MF = B.getMF();
const Function &F = MF.getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
Register PtrReg = B.getMRI()->createGenericVirtualRegister(PtrTy);
lowerParameterPtr(PtrReg, B, ParamTy, Offset);
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo,
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant,
TypeSize, Alignment);
SmallVector<ArgInfo, 32> SplitArgs;
SmallVector<uint64_t> FieldOffsets;
splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv(), &FieldOffsets);
B.buildLoad(DstReg, PtrReg, *MMO);
unsigned Idx = 0;
for (ArgInfo &SplitArg : SplitArgs) {
Register PtrReg = B.getMRI()->createGenericVirtualRegister(PtrTy);
lowerParameterPtr(PtrReg, B, Offset + FieldOffsets[Idx]);
const LLT ArgTy = getLLTForType(*SplitArg.Ty, DL);
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo,
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant,
ArgTy, commonAlignment(Alignment, FieldOffsets[Idx]));
assert(SplitArg.Regs.size() == 1);
B.buildLoad(SplitArg.Regs[0], PtrReg, *MMO);
++Idx;
}
}
// Allocate special inputs passed in user SGPRs.
@ -523,24 +534,19 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
assert(VRegs[i].size() == 1 &&
"expected only one register for byval pointers");
if (ByRefAS == AMDGPUAS::CONSTANT_ADDRESS) {
lowerParameterPtr(VRegs[i][0], B, ArgTy, ArgOffset);
lowerParameterPtr(VRegs[i][0], B, ArgOffset);
} else {
const LLT ConstPtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
Register PtrReg = MRI.createGenericVirtualRegister(ConstPtrTy);
lowerParameterPtr(PtrReg, B, ArgTy, ArgOffset);
lowerParameterPtr(PtrReg, B, ArgOffset);
B.buildAddrSpaceCast(VRegs[i][0], PtrReg);
}
} else {
ArrayRef<Register> OrigArgRegs = VRegs[i];
Register ArgReg =
OrigArgRegs.size() == 1
? OrigArgRegs[0]
: MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
lowerParameter(B, ArgTy, ArgOffset, Alignment, ArgReg);
if (OrigArgRegs.size() > 1)
unpackRegs(OrigArgRegs, ArgReg, ArgTy, B);
ArgInfo OrigArg(VRegs[i], Arg, i);
const unsigned OrigArgIdx = i + AttributeList::FirstArgIndex;
setArgFlags(OrigArg, OrigArgIdx, DL, F);
lowerParameter(B, OrigArg, ArgOffset, Alignment);
}
++i;

View File

@ -24,11 +24,11 @@ class MachineInstrBuilder;
class SIMachineFunctionInfo;
class AMDGPUCallLowering final : public CallLowering {
void lowerParameterPtr(Register DstReg, MachineIRBuilder &B, Type *ParamTy,
void lowerParameterPtr(Register DstReg, MachineIRBuilder &B,
uint64_t Offset) const;
void lowerParameter(MachineIRBuilder &B, Type *ParamTy, uint64_t Offset,
Align Alignment, Register DstReg) const;
void lowerParameter(MachineIRBuilder &B, ArgInfo &AI, uint64_t Offset,
Align Alignment) const;
bool canLowerReturn(MachineFunction &MF, CallingConv::ID CallConv,
SmallVectorImpl<BaseArgInfo> &Outs,

View File

@ -264,7 +264,7 @@ define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 8, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v2i8_arg
@ -276,7 +276,7 @@ define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -294,7 +294,7 @@ define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), align 8, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v2i16_arg
@ -306,7 +306,7 @@ define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -324,7 +324,7 @@ define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v2i32_arg
@ -336,7 +336,7 @@ define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -354,7 +354,7 @@ define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v2f32_arg
@ -366,7 +366,7 @@ define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -384,7 +384,7 @@ define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s24), align 8, addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 8, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v3i8_arg
@ -396,7 +396,7 @@ define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s24), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -414,7 +414,7 @@ define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s48), align 8, addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 8, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v3i16_arg
@ -426,7 +426,7 @@ define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s48), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -444,7 +444,7 @@ define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s96), align 16, addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v3i32_arg
@ -456,7 +456,7 @@ define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s96), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -474,7 +474,7 @@ define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s96), align 16, addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v3f32_arg
@ -486,7 +486,7 @@ define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s96), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -504,7 +504,7 @@ define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), align 8, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v4i8_arg
@ -516,7 +516,7 @@ define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -534,7 +534,7 @@ define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v4i16_arg
@ -546,7 +546,7 @@ define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -564,7 +564,7 @@ define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v4i32_arg
@ -576,7 +576,7 @@ define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -594,7 +594,7 @@ define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v4f32_arg
@ -606,7 +606,7 @@ define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -624,7 +624,7 @@ define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v8i8_arg
@ -636,7 +636,7 @@ define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -654,7 +654,7 @@ define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v8i16_arg
@ -666,7 +666,7 @@ define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -684,7 +684,7 @@ define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s256), align 16, addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v8i32_arg
@ -696,7 +696,7 @@ define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s256), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -714,7 +714,7 @@ define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s256), align 16, addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v8f32_arg
@ -726,7 +726,7 @@ define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s256), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -744,7 +744,7 @@ define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v16i8_arg
@ -756,7 +756,7 @@ define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -774,7 +774,7 @@ define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16>
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s256), align 16, addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 16, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v16i16_arg
@ -786,7 +786,7 @@ define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16>
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s256), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -804,7 +804,7 @@ define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s512), align 16, addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v16i32_arg
@ -816,7 +816,7 @@ define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s512), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -834,7 +834,7 @@ define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out,
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s512), align 16, addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v16f32_arg
@ -846,7 +846,7 @@ define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out,
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s512), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
entry:
@ -923,7 +923,7 @@ define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
; HSA-VI: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: i1_arg
@ -935,7 +935,7 @@ define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
store i1 %x, i1 addrspace(1)* %out, align 1
@ -952,7 +952,7 @@ define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwi
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1)
; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM 0
@ -965,7 +965,7 @@ define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwi
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1)
; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
@ -984,7 +984,7 @@ define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwi
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
; HSA-VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1)
; HSA-VI: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM 0
@ -997,7 +997,7 @@ define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwi
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1)
; LEGACY-MESA-VI: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
@ -1016,7 +1016,7 @@ define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwi
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
; HSA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1)
; HSA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM 0
@ -1029,7 +1029,7 @@ define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwi
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
; LEGACY-MESA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1)
; LEGACY-MESA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
@ -1048,7 +1048,7 @@ define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwi
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
; HSA-VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1)
; HSA-VI: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
; HSA-VI: S_ENDPGM 0
@ -1061,7 +1061,7 @@ define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwi
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
; LEGACY-MESA-VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1)
; LEGACY-MESA-VI: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
@ -1137,25 +1137,27 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad,
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s128), addrspace 4)
; HSA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s128), 0
; HSA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s128), 64
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4)
; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
; HSA-VI: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s128), align 8, addrspace 4)
; HSA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](s128), 0
; HSA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s128), 64
; HSA-VI: [[C3:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C3]](p1)
; HSA-VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C3]](p1)
; HSA-VI: G_STORE [[EXTRACT]](s32), [[C3]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[LOAD1]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[EXTRACT2]](s32), [[C3]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
; HSA-VI: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4)
; HSA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
; HSA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
; HSA-VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
; HSA-VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; HSA-VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
; HSA-VI: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
; HSA-VI: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
; HSA-VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
; HSA-VI: G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[LOAD4]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: struct_argument_alignment
; LEGACY-MESA-VI: bb.1 (%ir-block.0):
@ -1163,25 +1165,27 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad,
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s128), align 4, addrspace 4)
; LEGACY-MESA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s128), 0
; LEGACY-MESA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s128), 64
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 60
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s128), align 4, addrspace 4)
; LEGACY-MESA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](s128), 0
; LEGACY-MESA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s128), 64
; LEGACY-MESA-VI: [[C3:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C3]](p1)
; LEGACY-MESA-VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C3]](p1)
; LEGACY-MESA-VI: G_STORE [[EXTRACT]](s32), [[C3]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[EXTRACT2]](s32), [[C3]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60
; LEGACY-MESA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
; LEGACY-MESA-VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
; LEGACY-MESA-VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
; LEGACY-MESA-VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
; LEGACY-MESA-VI: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
; LEGACY-MESA-VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[LOAD4]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
%val0 = extractvalue {i32, i64} %arg0, 0
%val1 = extractvalue {i32, i64} %arg0, 1
@ -1195,6 +1199,77 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad,
ret void
}
define amdgpu_kernel void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %arg0, i8 %pad, {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1) {
; HSA-VI-LABEL: name: pointer_in_struct_argument
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
; HSA-VI: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4)
; HSA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
; HSA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
; HSA-VI: [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
; HSA-VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; HSA-VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
; HSA-VI: [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
; HSA-VI: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
; HSA-VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
; HSA-VI: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
; HSA-VI: G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[LOAD1]](p1), [[COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[LOAD4]](p1234), [[COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: pointer_in_struct_argument
; LEGACY-MESA-VI: bb.1 (%ir-block.0):
; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60
; LEGACY-MESA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
; LEGACY-MESA-VI: [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
; LEGACY-MESA-VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
; LEGACY-MESA-VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
; LEGACY-MESA-VI: [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
; LEGACY-MESA-VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
; LEGACY-MESA-VI: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
; LEGACY-MESA-VI: G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](p1), [[COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[LOAD4]](p1234), [[COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
%val0 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 0
%val1 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 1
%val2 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 0
%val3 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 1
store volatile i8 addrspace(3)* %val0, i8 addrspace(3)* addrspace(1)* null
store volatile i8 addrspace(1)* %val1, i8 addrspace(1)* addrspace(1)* null
store volatile i8 %pad, i8 addrspace(1)* null
store volatile i8 addrspace(3)* %val2, i8 addrspace(3)* addrspace(1)* null
store volatile i8 addrspace(1234)* %val3, i8 addrspace(1234)* addrspace(1)* null
ret void
}
; No padding between i8 and next struct, but round up at end to 4 byte
; multiple.
define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) {
@ -1204,20 +1279,22 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0,
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s96), align 16, addrspace 4)
; HSA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s96), 0
; HSA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 13
; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s96), align 1, addrspace 4)
; HSA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s96), 0
; HSA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD1]](s96), 32
; HSA-VI: [[C2:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C2]](p1)
; HSA-VI: G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[EXTRACT2]](s32), [[C2]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 13
; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4)
; HSA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 17
; HSA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
; HSA-VI: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4)
; HSA-VI: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C4]](p1)
; HSA-VI: G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: packed_struct_argument_alignment
; LEGACY-MESA-VI: bb.1 (%ir-block.1):
@ -1225,20 +1302,22 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0,
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s96), align 4, addrspace 4)
; LEGACY-MESA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s96), 0
; LEGACY-MESA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 49
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s96), align 1, addrspace 4)
; LEGACY-MESA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s96), 0
; LEGACY-MESA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD1]](s96), 32
; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C2]](p1)
; LEGACY-MESA-VI: G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[EXTRACT2]](s32), [[C2]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 49
; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4)
; LEGACY-MESA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 53
; LEGACY-MESA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
; LEGACY-MESA-VI: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4)
; LEGACY-MESA-VI: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C4]](p1)
; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
%val0 = extractvalue <{i32, i64}> %arg0, 0
%val1 = extractvalue <{i32, i64}> %arg0, 1
@ -1729,3 +1808,136 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byre
store i32 %in, i32 addrspace(1)* undef, align 4
ret void
}
define amdgpu_kernel void @p3i8_arg(i8 addrspace(3)* %arg) nounwind {
; HSA-VI-LABEL: name: p3i8_arg
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
; HSA-VI: G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: p3i8_arg
; LEGACY-MESA-VI: bb.1 (%ir-block.0):
; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
; LEGACY-MESA-VI: G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3)
; LEGACY-MESA-VI: S_ENDPGM 0
store i8 9, i8 addrspace(3)* %arg, align 4
ret void
}
define amdgpu_kernel void @p1i8_arg(i8 addrspace(1)* %arg) nounwind {
; HSA-VI-LABEL: name: p1i8_arg
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
; HSA-VI: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0
; HSA-VI: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `i8 addrspace(3)* null`, addrspace 3)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: p1i8_arg
; LEGACY-MESA-VI: bb.1 (%ir-block.0):
; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0
; LEGACY-MESA-VI: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `i8 addrspace(3)* null`, addrspace 3)
; LEGACY-MESA-VI: S_ENDPGM 0
store i8 9, i8 addrspace(3)* null
ret void
}
define amdgpu_kernel void @v2p1i8_arg(<2 x i8 addrspace(1)*> %arg) nounwind {
; HSA-VI-LABEL: name: v2p1i8_arg
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), addrspace 4)
; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; HSA-VI: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v2p1i8_arg
; LEGACY-MESA-VI: bb.1 (%ir-block.0):
; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4)
; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; LEGACY-MESA-VI: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
store <2 x i8 addrspace(1)*> %arg, <2 x i8 addrspace(1)*> addrspace(1)* undef
ret void
}
define amdgpu_kernel void @v2p3i8_arg(<2 x i8 addrspace(3)*> %arg) nounwind {
; HSA-VI-LABEL: name: v2p3i8_arg
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s32>), align 16, addrspace 4)
; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; HSA-VI: G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v2p3i8_arg
; LEGACY-MESA-VI: bb.1 (%ir-block.0):
; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; LEGACY-MESA-VI: G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
store <2 x i8 addrspace(3)*> %arg, <2 x i8 addrspace(3)*> addrspace(1)* undef
ret void
}
define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg) nounwind {
; HSA-VI-LABEL: name: v2p1i8_in_struct_arg
; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; HSA-VI: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), addrspace 4)
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 16, addrspace 4)
; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; HSA-VI: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1)
; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
; HSA-VI: G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: v2p1i8_in_struct_arg
; LEGACY-MESA-VI: bb.1 (%ir-block.0):
; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4)
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; LEGACY-MESA-VI: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1)
; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
store { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg, { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef
ret void
}