forked from OSchip/llvm-project
AMDGPU/GlobalISel: Redo kernel argument load handling
This avoids relying on G_EXTRACT on unusual types, and also properly decomposes structs into multiple registers. This also preserves the LLTs in the memory operands.
This commit is contained in:
parent
231bf52119
commit
21a0ef8d19
|
@ -384,7 +384,6 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val,
|
|||
}
|
||||
|
||||
void AMDGPUCallLowering::lowerParameterPtr(Register DstReg, MachineIRBuilder &B,
|
||||
Type *ParamTy,
|
||||
uint64_t Offset) const {
|
||||
MachineFunction &MF = B.getMF();
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
@ -398,26 +397,38 @@ void AMDGPUCallLowering::lowerParameterPtr(Register DstReg, MachineIRBuilder &B,
|
|||
B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);
|
||||
}
|
||||
|
||||
void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, Type *ParamTy,
|
||||
uint64_t Offset, Align Alignment,
|
||||
Register DstReg) const {
|
||||
void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, ArgInfo &OrigArg,
|
||||
uint64_t Offset,
|
||||
Align Alignment) const {
|
||||
MachineFunction &MF = B.getMF();
|
||||
const Function &F = MF.getFunction();
|
||||
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
|
||||
unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
|
||||
|
||||
LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
|
||||
Register PtrReg = B.getMRI()->createGenericVirtualRegister(PtrTy);
|
||||
lowerParameterPtr(PtrReg, B, ParamTy, Offset);
|
||||
|
||||
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
||||
PtrInfo,
|
||||
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
|
||||
MachineMemOperand::MOInvariant,
|
||||
TypeSize, Alignment);
|
||||
SmallVector<ArgInfo, 32> SplitArgs;
|
||||
SmallVector<uint64_t> FieldOffsets;
|
||||
splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv(), &FieldOffsets);
|
||||
|
||||
B.buildLoad(DstReg, PtrReg, *MMO);
|
||||
unsigned Idx = 0;
|
||||
for (ArgInfo &SplitArg : SplitArgs) {
|
||||
Register PtrReg = B.getMRI()->createGenericVirtualRegister(PtrTy);
|
||||
lowerParameterPtr(PtrReg, B, Offset + FieldOffsets[Idx]);
|
||||
|
||||
const LLT ArgTy = getLLTForType(*SplitArg.Ty, DL);
|
||||
|
||||
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
||||
PtrInfo,
|
||||
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
|
||||
MachineMemOperand::MOInvariant,
|
||||
ArgTy, commonAlignment(Alignment, FieldOffsets[Idx]));
|
||||
|
||||
assert(SplitArg.Regs.size() == 1);
|
||||
|
||||
B.buildLoad(SplitArg.Regs[0], PtrReg, *MMO);
|
||||
++Idx;
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate special inputs passed in user SGPRs.
|
||||
|
@ -523,24 +534,19 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
|
|||
assert(VRegs[i].size() == 1 &&
|
||||
"expected only one register for byval pointers");
|
||||
if (ByRefAS == AMDGPUAS::CONSTANT_ADDRESS) {
|
||||
lowerParameterPtr(VRegs[i][0], B, ArgTy, ArgOffset);
|
||||
lowerParameterPtr(VRegs[i][0], B, ArgOffset);
|
||||
} else {
|
||||
const LLT ConstPtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
|
||||
Register PtrReg = MRI.createGenericVirtualRegister(ConstPtrTy);
|
||||
lowerParameterPtr(PtrReg, B, ArgTy, ArgOffset);
|
||||
lowerParameterPtr(PtrReg, B, ArgOffset);
|
||||
|
||||
B.buildAddrSpaceCast(VRegs[i][0], PtrReg);
|
||||
}
|
||||
} else {
|
||||
ArrayRef<Register> OrigArgRegs = VRegs[i];
|
||||
Register ArgReg =
|
||||
OrigArgRegs.size() == 1
|
||||
? OrigArgRegs[0]
|
||||
: MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
|
||||
|
||||
lowerParameter(B, ArgTy, ArgOffset, Alignment, ArgReg);
|
||||
if (OrigArgRegs.size() > 1)
|
||||
unpackRegs(OrigArgRegs, ArgReg, ArgTy, B);
|
||||
ArgInfo OrigArg(VRegs[i], Arg, i);
|
||||
const unsigned OrigArgIdx = i + AttributeList::FirstArgIndex;
|
||||
setArgFlags(OrigArg, OrigArgIdx, DL, F);
|
||||
lowerParameter(B, OrigArg, ArgOffset, Alignment);
|
||||
}
|
||||
|
||||
++i;
|
||||
|
|
|
@ -24,11 +24,11 @@ class MachineInstrBuilder;
|
|||
class SIMachineFunctionInfo;
|
||||
|
||||
class AMDGPUCallLowering final : public CallLowering {
|
||||
void lowerParameterPtr(Register DstReg, MachineIRBuilder &B, Type *ParamTy,
|
||||
void lowerParameterPtr(Register DstReg, MachineIRBuilder &B,
|
||||
uint64_t Offset) const;
|
||||
|
||||
void lowerParameter(MachineIRBuilder &B, Type *ParamTy, uint64_t Offset,
|
||||
Align Alignment, Register DstReg) const;
|
||||
void lowerParameter(MachineIRBuilder &B, ArgInfo &AI, uint64_t Offset,
|
||||
Align Alignment) const;
|
||||
|
||||
bool canLowerReturn(MachineFunction &MF, CallingConv::ID CallConv,
|
||||
SmallVectorImpl<BaseArgInfo> &Outs,
|
||||
|
|
|
@ -264,7 +264,7 @@ define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 8, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v2i8_arg
|
||||
|
@ -276,7 +276,7 @@ define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -294,7 +294,7 @@ define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), align 8, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v2i16_arg
|
||||
|
@ -306,7 +306,7 @@ define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -324,7 +324,7 @@ define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v2i32_arg
|
||||
|
@ -336,7 +336,7 @@ define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -354,7 +354,7 @@ define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v2f32_arg
|
||||
|
@ -366,7 +366,7 @@ define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -384,7 +384,7 @@ define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s24), align 8, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 8, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v3i8_arg
|
||||
|
@ -396,7 +396,7 @@ define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s24), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -414,7 +414,7 @@ define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s48), align 8, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 8, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v3i16_arg
|
||||
|
@ -426,7 +426,7 @@ define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s48), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -444,7 +444,7 @@ define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s96), align 16, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v3i32_arg
|
||||
|
@ -456,7 +456,7 @@ define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s96), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -474,7 +474,7 @@ define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s96), align 16, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v3f32_arg
|
||||
|
@ -486,7 +486,7 @@ define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s96), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -504,7 +504,7 @@ define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), align 8, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v4i8_arg
|
||||
|
@ -516,7 +516,7 @@ define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -534,7 +534,7 @@ define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v4i16_arg
|
||||
|
@ -546,7 +546,7 @@ define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -564,7 +564,7 @@ define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v4i32_arg
|
||||
|
@ -576,7 +576,7 @@ define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -594,7 +594,7 @@ define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v4f32_arg
|
||||
|
@ -606,7 +606,7 @@ define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -624,7 +624,7 @@ define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v8i8_arg
|
||||
|
@ -636,7 +636,7 @@ define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -654,7 +654,7 @@ define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v8i16_arg
|
||||
|
@ -666,7 +666,7 @@ define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -684,7 +684,7 @@ define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s256), align 16, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v8i32_arg
|
||||
|
@ -696,7 +696,7 @@ define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s256), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -714,7 +714,7 @@ define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s256), align 16, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v8f32_arg
|
||||
|
@ -726,7 +726,7 @@ define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s256), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -744,7 +744,7 @@ define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v16i8_arg
|
||||
|
@ -756,7 +756,7 @@ define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -774,7 +774,7 @@ define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16>
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s256), align 16, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 16, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v16i16_arg
|
||||
|
@ -786,7 +786,7 @@ define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16>
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s256), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -804,7 +804,7 @@ define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s512), align 16, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v16i32_arg
|
||||
|
@ -816,7 +816,7 @@ define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s512), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -834,7 +834,7 @@ define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out,
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s512), align 16, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v16f32_arg
|
||||
|
@ -846,7 +846,7 @@ define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out,
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s512), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
entry:
|
||||
|
@ -923,7 +923,7 @@ define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
|
||||
; HSA-VI: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: i1_arg
|
||||
|
@ -935,7 +935,7 @@ define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
store i1 %x, i1 addrspace(1)* %out, align 1
|
||||
|
@ -952,7 +952,7 @@ define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwi
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
|
||||
; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1)
|
||||
; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
|
@ -965,7 +965,7 @@ define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwi
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1)
|
||||
; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
|
@ -984,7 +984,7 @@ define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwi
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
|
||||
; HSA-VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1)
|
||||
; HSA-VI: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
|
@ -997,7 +997,7 @@ define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwi
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1)
|
||||
; LEGACY-MESA-VI: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
|
@ -1016,7 +1016,7 @@ define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwi
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
|
||||
; HSA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1)
|
||||
; HSA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
|
@ -1029,7 +1029,7 @@ define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwi
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1)
|
||||
; LEGACY-MESA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
|
@ -1048,7 +1048,7 @@ define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwi
|
|||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
|
||||
; HSA-VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1)
|
||||
; HSA-VI: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
|
@ -1061,7 +1061,7 @@ define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwi
|
|||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1)
|
||||
; LEGACY-MESA-VI: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
|
@ -1137,25 +1137,27 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad,
|
|||
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
|
||||
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; HSA-VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s128), addrspace 4)
|
||||
; HSA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s128), 0
|
||||
; HSA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s128), 64
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4)
|
||||
; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
|
||||
; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
|
||||
; HSA-VI: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s128), align 8, addrspace 4)
|
||||
; HSA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](s128), 0
|
||||
; HSA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s128), 64
|
||||
; HSA-VI: [[C3:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
|
||||
; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C3]](p1)
|
||||
; HSA-VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C3]](p1)
|
||||
; HSA-VI: G_STORE [[EXTRACT]](s32), [[C3]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD1]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[EXTRACT2]](s32), [[C3]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4)
|
||||
; HSA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
|
||||
; HSA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
|
||||
; HSA-VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
|
||||
; HSA-VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
|
||||
; HSA-VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
|
||||
; HSA-VI: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
|
||||
; HSA-VI: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
|
||||
; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
|
||||
; HSA-VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
|
||||
; HSA-VI: G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD4]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: struct_argument_alignment
|
||||
; LEGACY-MESA-VI: bb.1 (%ir-block.0):
|
||||
|
@ -1163,25 +1165,27 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad,
|
|||
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
|
||||
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
|
||||
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s128), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s128), 0
|
||||
; LEGACY-MESA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s128), 64
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
|
||||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 60
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
|
||||
; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s128), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](s128), 0
|
||||
; LEGACY-MESA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s128), 64
|
||||
; LEGACY-MESA-VI: [[C3:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
|
||||
; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C3]](p1)
|
||||
; LEGACY-MESA-VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C3]](p1)
|
||||
; LEGACY-MESA-VI: G_STORE [[EXTRACT]](s32), [[C3]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[EXTRACT2]](s32), [[C3]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60
|
||||
; LEGACY-MESA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
|
||||
; LEGACY-MESA-VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
|
||||
; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
|
||||
; LEGACY-MESA-VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD4]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
%val0 = extractvalue {i32, i64} %arg0, 0
|
||||
%val1 = extractvalue {i32, i64} %arg0, 1
|
||||
|
@ -1195,6 +1199,77 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad,
|
|||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %arg0, i8 %pad, {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1) {
|
||||
; HSA-VI-LABEL: name: pointer_in_struct_argument
|
||||
; HSA-VI: bb.1 (%ir-block.0):
|
||||
; HSA-VI: liveins: $sgpr4_sgpr5
|
||||
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
|
||||
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
|
||||
; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
|
||||
; HSA-VI: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4)
|
||||
; HSA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
|
||||
; HSA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
|
||||
; HSA-VI: [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
|
||||
; HSA-VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
|
||||
; HSA-VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
|
||||
; HSA-VI: [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
|
||||
; HSA-VI: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
|
||||
; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
|
||||
; HSA-VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
|
||||
; HSA-VI: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
|
||||
; HSA-VI: G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD1]](p1), [[COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD4]](p1234), [[COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: pointer_in_struct_argument
|
||||
; LEGACY-MESA-VI: bb.1 (%ir-block.0):
|
||||
; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1
|
||||
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
|
||||
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
|
||||
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
|
||||
; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60
|
||||
; LEGACY-MESA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
|
||||
; LEGACY-MESA-VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
|
||||
; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
|
||||
; LEGACY-MESA-VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
|
||||
; LEGACY-MESA-VI: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](p1), [[COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD4]](p1234), [[COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
%val0 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 0
|
||||
%val1 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 1
|
||||
%val2 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 0
|
||||
%val3 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 1
|
||||
store volatile i8 addrspace(3)* %val0, i8 addrspace(3)* addrspace(1)* null
|
||||
store volatile i8 addrspace(1)* %val1, i8 addrspace(1)* addrspace(1)* null
|
||||
store volatile i8 %pad, i8 addrspace(1)* null
|
||||
store volatile i8 addrspace(3)* %val2, i8 addrspace(3)* addrspace(1)* null
|
||||
store volatile i8 addrspace(1234)* %val3, i8 addrspace(1234)* addrspace(1)* null
|
||||
ret void
|
||||
}
|
||||
|
||||
; No padding between i8 and next struct, but round up at end to 4 byte
|
||||
; multiple.
|
||||
define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) {
|
||||
|
@ -1204,20 +1279,22 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0,
|
|||
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
|
||||
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; HSA-VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s96), align 16, addrspace 4)
|
||||
; HSA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s96), 0
|
||||
; HSA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 13
|
||||
; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s96), align 1, addrspace 4)
|
||||
; HSA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s96), 0
|
||||
; HSA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD1]](s96), 32
|
||||
; HSA-VI: [[C2:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
|
||||
; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C2]](p1)
|
||||
; HSA-VI: G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[EXTRACT2]](s32), [[C2]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 13
|
||||
; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
|
||||
; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4)
|
||||
; HSA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 17
|
||||
; HSA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
|
||||
; HSA-VI: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4)
|
||||
; HSA-VI: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
|
||||
; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C4]](p1)
|
||||
; HSA-VI: G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: packed_struct_argument_alignment
|
||||
; LEGACY-MESA-VI: bb.1 (%ir-block.1):
|
||||
|
@ -1225,20 +1302,22 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0,
|
|||
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
|
||||
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
|
||||
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s96), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s96), 0
|
||||
; LEGACY-MESA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 49
|
||||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s96), align 1, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s96), 0
|
||||
; LEGACY-MESA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD1]](s96), 32
|
||||
; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
|
||||
; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C2]](p1)
|
||||
; LEGACY-MESA-VI: G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[EXTRACT2]](s32), [[C2]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 49
|
||||
; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 53
|
||||
; LEGACY-MESA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
|
||||
; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C4]](p1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
%val0 = extractvalue <{i32, i64}> %arg0, 0
|
||||
%val1 = extractvalue <{i32, i64}> %arg0, 1
|
||||
|
@ -1729,3 +1808,136 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byre
|
|||
store i32 %in, i32 addrspace(1)* undef, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @p3i8_arg(i8 addrspace(3)* %arg) nounwind {
|
||||
; HSA-VI-LABEL: name: p3i8_arg
|
||||
; HSA-VI: bb.1 (%ir-block.0):
|
||||
; HSA-VI: liveins: $sgpr4_sgpr5
|
||||
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
|
||||
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; HSA-VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
|
||||
; HSA-VI: G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: p3i8_arg
|
||||
; LEGACY-MESA-VI: bb.1 (%ir-block.0):
|
||||
; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1
|
||||
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
|
||||
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
|
||||
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
|
||||
; LEGACY-MESA-VI: G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
store i8 9, i8 addrspace(3)* %arg, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @p1i8_arg(i8 addrspace(1)* %arg) nounwind {
|
||||
; HSA-VI-LABEL: name: p1i8_arg
|
||||
; HSA-VI: bb.1 (%ir-block.0):
|
||||
; HSA-VI: liveins: $sgpr4_sgpr5
|
||||
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
|
||||
; HSA-VI: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0
|
||||
; HSA-VI: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `i8 addrspace(3)* null`, addrspace 3)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: p1i8_arg
|
||||
; LEGACY-MESA-VI: bb.1 (%ir-block.0):
|
||||
; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1
|
||||
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
|
||||
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0
|
||||
; LEGACY-MESA-VI: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `i8 addrspace(3)* null`, addrspace 3)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
store i8 9, i8 addrspace(3)* null
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v2p1i8_arg(<2 x i8 addrspace(1)*> %arg) nounwind {
|
||||
; HSA-VI-LABEL: name: v2p1i8_arg
|
||||
; HSA-VI: bb.1 (%ir-block.0):
|
||||
; HSA-VI: liveins: $sgpr4_sgpr5
|
||||
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
|
||||
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; HSA-VI: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), addrspace 4)
|
||||
; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; HSA-VI: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v2p1i8_arg
|
||||
; LEGACY-MESA-VI: bb.1 (%ir-block.0):
|
||||
; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1
|
||||
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
|
||||
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
|
||||
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
store <2 x i8 addrspace(1)*> %arg, <2 x i8 addrspace(1)*> addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v2p3i8_arg(<2 x i8 addrspace(3)*> %arg) nounwind {
|
||||
; HSA-VI-LABEL: name: v2p3i8_arg
|
||||
; HSA-VI: bb.1 (%ir-block.0):
|
||||
; HSA-VI: liveins: $sgpr4_sgpr5
|
||||
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
|
||||
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; HSA-VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s32>), align 16, addrspace 4)
|
||||
; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; HSA-VI: G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v2p3i8_arg
|
||||
; LEGACY-MESA-VI: bb.1 (%ir-block.0):
|
||||
; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1
|
||||
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
|
||||
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
|
||||
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
store <2 x i8 addrspace(3)*> %arg, <2 x i8 addrspace(3)*> addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg) nounwind {
|
||||
; HSA-VI-LABEL: name: v2p1i8_in_struct_arg
|
||||
; HSA-VI: bb.1 (%ir-block.0):
|
||||
; HSA-VI: liveins: $sgpr4_sgpr5
|
||||
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
|
||||
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; HSA-VI: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), addrspace 4)
|
||||
; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 16, addrspace 4)
|
||||
; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; HSA-VI: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1)
|
||||
; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
|
||||
; HSA-VI: G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1)
|
||||
; HSA-VI: S_ENDPGM 0
|
||||
; LEGACY-MESA-VI-LABEL: name: v2p1i8_in_struct_arg
|
||||
; LEGACY-MESA-VI: bb.1 (%ir-block.0):
|
||||
; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1
|
||||
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
|
||||
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
|
||||
; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
|
||||
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
|
||||
; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1)
|
||||
; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
|
||||
; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1)
|
||||
; LEGACY-MESA-VI: S_ENDPGM 0
|
||||
store { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg, { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue