GlobalISel: Use LLT in call lowering callbacks

This preserves the memory type so the lowerings can rely on them.
This commit is contained in:
Matt Arsenault 2021-06-10 17:31:30 -04:00
parent 2668727929
commit 99c7e918b5
14 changed files with 80 additions and 94 deletions

View File

@ -236,7 +236,7 @@ public:
/// direct SP manipulation, depending on the context. \p MPO
/// should be initialized to an appropriate description of the
/// address created.
virtual Register getStackAddress(uint64_t Size, int64_t Offset,
virtual Register getStackAddress(uint64_t MemSize, int64_t Offset,
MachinePointerInfo &MPO,
ISD::ArgFlagsTy Flags) = 0;
@ -245,8 +245,8 @@ public:
///
/// This is overridable primarily for targets to maintain compatibility with
/// hacks around the existing DAG call lowering infrastructure.
virtual uint64_t getStackValueStoreSize(const DataLayout &DL,
const CCValAssign &VA) const;
virtual LLT getStackValueStoreType(const DataLayout &DL,
const CCValAssign &VA) const;
/// The specified value has been assigned to a physical register,
/// handle the appropriate COPY (either to or from) and mark any
@ -258,17 +258,17 @@ public:
/// location. Load or store it there, with appropriate extension
/// if necessary.
virtual void assignValueToAddress(Register ValVReg, Register Addr,
uint64_t Size, MachinePointerInfo &MPO,
LLT MemTy, MachinePointerInfo &MPO,
CCValAssign &VA) = 0;
/// An overload which takes an ArgInfo if additional information about the
/// arg is needed. \p ValRegIndex is the index in \p Arg.Regs for the value
/// to store.
virtual void assignValueToAddress(const ArgInfo &Arg, unsigned ValRegIndex,
Register Addr, uint64_t Size,
Register Addr, LLT MemTy,
MachinePointerInfo &MPO,
CCValAssign &VA) {
assignValueToAddress(Arg.Regs[ValRegIndex], Addr, Size, MPO, VA);
assignValueToAddress(Arg.Regs[ValRegIndex], Addr, MemTy, MPO, VA);
}
/// Handle custom values, which may be passed into one or more of \p VAs.

View File

@ -682,14 +682,13 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// TODO: The memory size may be larger than the value we need to
// store. We may need to adjust the offset for big endian targets.
uint64_t MemSize = Handler.getStackValueStoreSize(DL, VA);
LLT MemTy = Handler.getStackValueStoreType(DL, VA);
MachinePointerInfo MPO;
Register StackAddr =
Handler.getStackAddress(MemSize, VA.getLocMemOffset(), MPO, Flags);
Register StackAddr = Handler.getStackAddress(
MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
Handler.assignValueToAddress(Args[i], Part, StackAddr, MemSize, MPO,
VA);
Handler.assignValueToAddress(Args[i], Part, StackAddr, MemTy, MPO, VA);
continue;
}
@ -1016,14 +1015,14 @@ bool CallLowering::resultsCompatible(CallLoweringInfo &Info,
return true;
}
uint64_t CallLowering::ValueHandler::getStackValueStoreSize(
LLT CallLowering::ValueHandler::getStackValueStoreType(
const DataLayout &DL, const CCValAssign &VA) const {
const EVT ValVT = VA.getValVT();
const MVT ValVT = VA.getValVT();
if (ValVT != MVT::iPTR)
return ValVT.getStoreSize();
return LLT(ValVT);
/// FIXME: We need to get the correct pointer address space.
return DL.getPointerSize();
return LLT::pointer(0, DL.getPointerSize(0));
}
void CallLowering::ValueHandler::copyArgumentMemory(

View File

@ -66,10 +66,10 @@ static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT,
}
// Account for i1/i8/i16 stack passed value hack
static uint64_t getStackValueStoreSizeHack(const CCValAssign &VA) {
static LLT getStackValueStoreTypeHack(const CCValAssign &VA) {
const MVT ValVT = VA.getValVT();
return (ValVT == MVT::i8 || ValVT == MVT::i16) ? ValVT.getStoreSize()
: VA.getLocVT().getStoreSize();
return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT)
: LLT(VA.getLocVT());
}
namespace {
@ -146,9 +146,9 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
return AddrReg.getReg(0);
}
uint64_t getStackValueStoreSize(const DataLayout &,
const CCValAssign &VA) const override {
return getStackValueStoreSizeHack(VA);
LLT getStackValueStoreType(const DataLayout &,
const CCValAssign &VA) const override {
return getStackValueStoreTypeHack(VA);
}
void assignValueToReg(Register ValVReg, Register PhysReg,
@ -157,7 +157,7 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t MemSize,
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
@ -170,11 +170,9 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16)
std::swap(ValTy, LocTy);
MemSize = LocTy.getSizeInBytes();
auto MMO = MF.getMachineMemOperand(
MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant,
MemSize, inferAlignFromPtrInfo(MF, MPO));
MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, LocTy,
inferAlignFromPtrInfo(MF, MPO));
if (RealRegTy.getSizeInBits() == ValTy.getSizeInBits()) {
// No extension information, or no extension necessary. Load into the
@ -264,9 +262,9 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
/// we invert the interpretation of ValVT and LocVT in certain cases. This is
/// for compatability with the DAG call lowering implementation, which we're
/// currently building on top of.
uint64_t getStackValueStoreSize(const DataLayout &,
const CCValAssign &VA) const override {
return getStackValueStoreSizeHack(VA);
LLT getStackValueStoreType(const DataLayout &,
const CCValAssign &VA) const override {
return getStackValueStoreTypeHack(VA);
}
void assignValueToReg(Register ValVReg, Register PhysReg,
@ -276,18 +274,18 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
MIRBuilder.buildCopy(PhysReg, ExtReg);
}
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, Size,
auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
inferAlignFromPtrInfo(MF, MPO));
MIRBuilder.buildStore(ValVReg, Addr, *MMO);
}
void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex,
Register Addr, uint64_t MemSize,
MachinePointerInfo &MPO, CCValAssign &VA) override {
unsigned MaxSize = MemSize * 8;
Register Addr, LLT MemTy, MachinePointerInfo &MPO,
CCValAssign &VA) override {
unsigned MaxSize = MemTy.getSizeInBytes() * 8;
// For varargs, we always want to extend them to 8 bytes, in which case
// we disable setting a max.
if (!Arg.IsFixed)
@ -300,7 +298,7 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) {
std::swap(ValVT, LocVT);
MemSize = VA.getValVT().getStoreSize();
MemTy = LLT(VA.getValVT());
}
ValVReg = extendRegister(ValVReg, VA, MaxSize);
@ -310,10 +308,10 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
ValVReg = MIRBuilder.buildTrunc(RegTy, ValVReg).getReg(0);
} else {
// The store does not cover the full allocated stack slot.
MemSize = VA.getValVT().getStoreSize();
MemTy = LLT(VA.getValVT());
}
assignValueToAddress(ValVReg, Addr, MemSize, MPO, VA);
assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
}
MachineInstrBuilder MIB;

View File

@ -54,7 +54,7 @@ struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
llvm_unreachable("not implemented");
}
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override {
llvm_unreachable("not implemented");
}
@ -122,12 +122,12 @@ struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler {
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t MemSize,
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
auto MMO = MF.getMachineMemOperand(
MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemSize,
MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemTy,
inferAlignFromPtrInfo(MF, MPO));
MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
}
@ -209,26 +209,25 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
MIRBuilder.buildCopy(PhysReg, ExtReg);
}
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
uint64_t LocMemOffset = VA.getLocMemOffset();
const auto &ST = MF.getSubtarget<GCNSubtarget>();
auto MMO = MF.getMachineMemOperand(
MPO, MachineMemOperand::MOStore, Size,
commonAlignment(ST.getStackAlignment(), LocMemOffset));
MPO, MachineMemOperand::MOStore, MemTy,
commonAlignment(ST.getStackAlignment(), LocMemOffset));
MIRBuilder.buildStore(ValVReg, Addr, *MMO);
}
void assignValueToAddress(const CallLowering::ArgInfo &Arg,
unsigned ValRegIndex, Register Addr,
uint64_t MemSize, MachinePointerInfo &MPO,
CCValAssign &VA) override {
unsigned ValRegIndex, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override {
Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt
? extendRegister(Arg.Regs[ValRegIndex], VA)
: Arg.Regs[ValRegIndex];
assignValueToAddress(ValVReg, Addr, MemSize, MPO, VA);
assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
}
};
}

View File

@ -121,14 +121,11 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
MIB.addUse(PhysReg, RegState::Implicit);
}
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override {
assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
"Unsupported size");
Register ExtReg = extendRegister(ValVReg, VA);
auto MMO = MIRBuilder.getMF().getMachineMemOperand(
MPO, MachineMemOperand::MOStore, LLT(VA.getLocVT()), Align(1));
MPO, MachineMemOperand::MOStore, MemTy, Align(1));
MIRBuilder.buildStore(ExtReg, Addr, *MMO);
}
@ -249,31 +246,28 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
.getReg(0);
}
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override {
assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
"Unsupported size");
if (VA.getLocInfo() == CCValAssign::SExt ||
VA.getLocInfo() == CCValAssign::ZExt) {
// If the value is zero- or sign-extended, its size becomes 4 bytes, so
// that's what we should load.
Size = 4;
MemTy = LLT::scalar(32);
assert(MRI.getType(ValVReg).isScalar() && "Only scalars supported atm");
auto LoadVReg = buildLoad(LLT::scalar(32), Addr, Size, MPO);
auto LoadVReg = buildLoad(LLT::scalar(32), Addr, MemTy, MPO);
MIRBuilder.buildTrunc(ValVReg, LoadVReg);
} else {
// If the value is not extended, a simple load will suffice.
buildLoad(ValVReg, Addr, Size, MPO);
buildLoad(ValVReg, Addr, MemTy, MPO);
}
}
MachineInstrBuilder buildLoad(const DstOp &Res, Register Addr, uint64_t Size,
MachineInstrBuilder buildLoad(const DstOp &Res, Register Addr, LLT MemTy,
MachinePointerInfo &MPO) {
MachineFunction &MF = MIRBuilder.getMF();
auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, Size,
auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, MemTy,
inferAlignFromPtrInfo(MF, MPO));
return MIRBuilder.buildLoad(Res, Addr, *MMO);
}

View File

@ -74,11 +74,11 @@ void M68kIncomingValueHandler::assignValueToReg(Register ValVReg,
void M68kIncomingValueHandler::assignValueToAddress(Register ValVReg,
Register Addr,
uint64_t Size,
LLT MemTy,
MachinePointerInfo &MPO,
CCValAssign &VA) {
MachineFunction &MF = MIRBuilder.getMF();
auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, Size,
auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, MemTy,
inferAlignFromPtrInfo(MF, MPO));
MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
}

View File

@ -54,7 +54,7 @@ private:
void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override;
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override;
Register getStackAddress(uint64_t Size, int64_t Offset,

View File

@ -86,22 +86,19 @@ void PPCIncomingValueHandler::assignValueToReg(Register ValVReg,
}
void PPCIncomingValueHandler::assignValueToAddress(Register ValVReg,
Register Addr, uint64_t Size,
Register Addr, LLT MemTy,
MachinePointerInfo &MPO,
CCValAssign &VA) {
assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
"Unsupported size");
// define a lambda expression to load value
auto BuildLoad = [](MachineIRBuilder &MIRBuilder, MachinePointerInfo &MPO,
uint64_t Size, const DstOp &Res, Register Addr) {
LLT MemTy, const DstOp &Res, Register Addr) {
MachineFunction &MF = MIRBuilder.getMF();
auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, Size,
auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, MemTy,
inferAlignFromPtrInfo(MF, MPO));
return MIRBuilder.buildLoad(Res, Addr, *MMO);
};
BuildLoad(MIRBuilder, MPO, Size, ValVReg, Addr);
BuildLoad(MIRBuilder, MPO, MemTy, ValVReg, Addr);
}
Register PPCIncomingValueHandler::getStackAddress(uint64_t Size, int64_t Offset,

View File

@ -48,7 +48,7 @@ private:
void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override;
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override;
Register getStackAddress(uint64_t Size, int64_t Offset,

View File

@ -111,13 +111,12 @@ struct X86OutgoingValueHandler : public CallLowering::OutgoingValueHandler {
MIRBuilder.buildCopy(PhysReg, ExtReg);
}
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
Register ExtReg = extendRegister(ValVReg, VA);
auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore,
VA.getLocVT().getStoreSize(),
auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
inferAlignFromPtrInfo(MF, MPO));
MIRBuilder.buildStore(ExtReg, Addr, *MMO);
}
@ -186,11 +185,11 @@ struct X86IncomingValueHandler : public CallLowering::IncomingValueHandler {
.getReg(0);
}
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
auto *MMO = MF.getMachineMemOperand(
MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size,
MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemTy,
inferAlignFromPtrInfo(MF, MPO));
MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
}

View File

@ -66,7 +66,7 @@ define void @test_outgoing_stack_args([8 x <2 x double>], <4 x half> %arg) {
; DARWIN: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
; DARWIN: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
; DARWIN: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
; DARWIN: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.0, align 16)
; DARWIN: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
; DARWIN: $d0 = COPY [[LOAD]](<4 x s16>)
; DARWIN: TCRETURNdi @outgoing_stack_args_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $d0
; WINDOWS-LABEL: name: test_outgoing_stack_args
@ -81,7 +81,7 @@ define void @test_outgoing_stack_args([8 x <2 x double>], <4 x half> %arg) {
; WINDOWS: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
; WINDOWS: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
; WINDOWS: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
; WINDOWS: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.0, align 16)
; WINDOWS: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
; WINDOWS: $d0 = COPY [[LOAD]](<4 x s16>)
; WINDOWS: TCRETURNdi @outgoing_stack_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $d0
tail call void @outgoing_stack_args_fn(<4 x half> %arg)
@ -242,7 +242,7 @@ define void @test_varargs_3([8 x <2 x double>], <4 x half> %arg) {
; DARWIN: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
; DARWIN: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
; DARWIN: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
; DARWIN: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.0, align 16)
; DARWIN: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
; DARWIN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; DARWIN: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
; DARWIN: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
@ -270,7 +270,7 @@ define void @test_varargs_3([8 x <2 x double>], <4 x half> %arg) {
; WINDOWS: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
; WINDOWS: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
; WINDOWS: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
; WINDOWS: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.0, align 16)
; WINDOWS: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16)
; WINDOWS: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; WINDOWS: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
; WINDOWS: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12

View File

@ -2459,7 +2459,7 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@ -2551,10 +2551,10 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
; CHECK: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5)
; CHECK: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5)
; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@ -2644,10 +2644,10 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
; CHECK: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5)
; CHECK: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5)
; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4)

View File

@ -1293,7 +1293,7 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 {
; CHECK: [[COPY30:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr30
; CHECK: [[COPY31:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr31
; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, align 16, addrspace 5)
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[COPY31]](<2 x s16>), [[LOAD]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
; CHECK: [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>)
@ -2079,7 +2079,7 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1
; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)
; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s8) from %fixed-stack.3, align 16, addrspace 5)
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s1) from %fixed-stack.3, align 16, addrspace 5)
; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[LOAD]](s32)
; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s16) from %fixed-stack.2, align 4, addrspace 5)
@ -2265,9 +2265,9 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2
; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)
; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.1, align 16, addrspace 5)
; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, align 16, addrspace 5)
; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; CHECK: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5)
; CHECK: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, addrspace 5)
; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)

View File

@ -492,7 +492,7 @@ define <8 x i32> @test_split_return_callee(<8 x i32> %arg1, <8 x i32> %arg2) {
; X86: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
; X86: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $xmm2
; X86: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
; X86: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s128) from %fixed-stack.0)
; X86: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s32>) from %fixed-stack.0)
; X86: [[CONCAT_VECTORS1:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY2]](<4 x s32>), [[LOAD]](<4 x s32>)
; X86: ADJCALLSTACKDOWN32 0, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
; X86: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<8 x s32>)
@ -570,21 +570,21 @@ define void @test_abi_exts_call(i8* %addr) {
; X86: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; X86: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
; X86: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8)
; X86: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1)
; X86: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p0) :: (store (s8) into stack)
; X86: CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp
; X86: ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
; X86: ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
; X86: [[COPY1:%[0-9]+]]:_(p0) = COPY $esp
; X86: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s32)
; X86: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8)
; X86: G_STORE [[SEXT]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack, align 1)
; X86: G_STORE [[SEXT]](s32), [[PTR_ADD1]](p0) :: (store (s8) into stack)
; X86: CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp
; X86: ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
; X86: ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
; X86: [[COPY2:%[0-9]+]]:_(p0) = COPY $esp
; X86: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s32)
; X86: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
; X86: G_STORE [[ZEXT]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack, align 1)
; X86: G_STORE [[ZEXT]](s32), [[PTR_ADD2]](p0) :: (store (s8) into stack)
; X86: CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp
; X86: ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
; X86: RET 0