From a9ea8a9aae1f47eb9c68c1669aabd3908630518c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 26 Jul 2019 02:36:05 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Handle most function return types handleAssignments gives up pretty easily on structs, and i8 values for some reason. The other case that doesn't work is when an implicit sret needs to be inserted if the return size exceeds the number of return registers. llvm-svn: 367082 --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 170 ++- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h | 5 + .../AMDGPU/GlobalISel/function-returns.ll | 1063 +++++++++++++++++ .../GlobalISel/irtranslator-amdgpu_vs.ll | 2 +- .../GlobalISel/irtranslator-function-args.ll | 406 ++++--- llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll | 3 + llvm/test/CodeGen/AMDGPU/ret.ll | 12 +- 7 files changed, 1487 insertions(+), 174 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 30d3903be9cb..d0c99aeca0a6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -30,9 +30,9 @@ using namespace llvm; namespace { -struct OutgoingArgHandler : public CallLowering::ValueHandler { - OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, - MachineInstrBuilder MIB, CCAssignFn *AssignFn) +struct OutgoingValueHandler : public CallLowering::ValueHandler { + OutgoingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + MachineInstrBuilder MIB, CCAssignFn *AssignFn) : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} MachineInstrBuilder MIB; @@ -49,8 +49,16 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler { void assignValueToReg(Register ValVReg, Register PhysReg, CCValAssign &VA) override { - MIB.addUse(PhysReg); - MIRBuilder.buildCopy(PhysReg, ValVReg); + Register ExtReg; + if (VA.getLocVT().getSizeInBits() < 32) { + // 16-bit types are reported as legal for 32-bit registers. We need to + // extend and do a 32-bit copy to avoid the verifier complaining about it. + ExtReg = MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0); + } else + ExtReg = extendRegister(ValVReg, VA); + + MIRBuilder.buildCopy(PhysReg, ExtReg); + MIB.addUse(PhysReg, RegState::Implicit); } bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, @@ -193,6 +201,90 @@ void AMDGPUCallLowering::splitToValueTypes( } } +// Get the appropriate type to make \p OrigTy \p Factor times bigger. +static LLT getMultipleType(LLT OrigTy, int Factor) { + if (OrigTy.isVector()) { + return LLT::vector(OrigTy.getNumElements() * Factor, + OrigTy.getElementType()); + } + + return LLT::scalar(OrigTy.getSizeInBits() * Factor); +} + +// TODO: Move to generic code +static void unpackRegsToOrigType(MachineIRBuilder &MIRBuilder, + ArrayRef DstRegs, + Register SrcReg, + LLT SrcTy, + LLT PartTy) { + assert(DstRegs.size() > 1 && "Nothing to unpack"); + + MachineFunction &MF = MIRBuilder.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + const unsigned SrcSize = SrcTy.getSizeInBits(); + const unsigned PartSize = PartTy.getSizeInBits(); + + if (SrcTy.isVector() && !PartTy.isVector() && + PartSize > SrcTy.getElementType().getSizeInBits()) { + // Vector was scalarized, and the elements extended. + auto UnmergeToEltTy = MIRBuilder.buildUnmerge(SrcTy.getElementType(), + SrcReg); + for (int i = 0, e = DstRegs.size(); i != e; ++i) + MIRBuilder.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i)); + return; + } + + if (SrcSize % PartSize == 0) { + MIRBuilder.buildUnmerge(DstRegs, SrcReg); + return; + } + + const int NumRoundedParts = (SrcSize + PartSize - 1) / PartSize; + + LLT BigTy = getMultipleType(PartTy, NumRoundedParts); + auto ImpDef = MIRBuilder.buildUndef(BigTy); + + Register BigReg = MRI.createGenericVirtualRegister(BigTy); + MIRBuilder.buildInsert(BigReg, ImpDef.getReg(0), SrcReg, 0).getReg(0); + + int64_t Offset = 0; + for (unsigned i = 0, e = DstRegs.size(); i != e; ++i, Offset += PartSize) + MIRBuilder.buildExtract(DstRegs[i], BigReg, Offset); +} + +/// Lower the return value for the already existing \p Ret. This assumes that +/// \p MIRBuilder's insertion point is correct. +bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, + const Value *Val, ArrayRef VRegs, + MachineInstrBuilder &Ret) const { + if (!Val) + return true; + + auto &MF = MIRBuilder.getMF(); + const auto &F = MF.getFunction(); + const DataLayout &DL = MF.getDataLayout(); + + CallingConv::ID CC = F.getCallingConv(); + const SITargetLowering &TLI = *getTLI(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + ArgInfo OrigRetInfo(VRegs, Val->getType()); + setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F); + SmallVector SplitRetInfos; + + splitToValueTypes( + OrigRetInfo, SplitRetInfos, DL, MRI, CC, + [&](ArrayRef Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) { + unpackRegsToOrigType(MIRBuilder, Regs, VRegs[VTSplitIdx], LLTy, PartLLT); + }); + + CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg()); + + OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret, AssignFn); + return handleAssignments(MIRBuilder, SplitRetInfos, RetHandler); +} + bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef VRegs) const { @@ -202,38 +294,43 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, SIMachineFunctionInfo *MFI = MF.getInfo(); MFI->setIfReturnsVoid(!Val); - if (!Val) { - MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0); + assert(!Val == VRegs.empty() && "Return value without a vreg"); + + CallingConv::ID CC = MIRBuilder.getMF().getFunction().getCallingConv(); + const bool IsShader = AMDGPU::isShader(CC); + const bool IsWaveEnd = (IsShader && MFI->returnsVoid()) || + AMDGPU::isKernel(CC); + if (IsWaveEnd) { + MIRBuilder.buildInstr(AMDGPU::S_ENDPGM) + .addImm(0); return true; } - Register VReg = VRegs[0]; + auto const &ST = MIRBuilder.getMF().getSubtarget(); - const Function &F = MF.getFunction(); - auto &DL = F.getParent()->getDataLayout(); - if (!AMDGPU::isShader(F.getCallingConv())) - return false; + unsigned ReturnOpc = ReturnOpc = IsShader ? + AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return; - - const AMDGPUTargetLowering &TLI = *getTLI(); - SmallVector SplitVTs; - SmallVector Offsets; - ArgInfo OrigArg{VReg, Val->getType()}; - setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); - ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); - - SmallVector SplitArgs; - CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false); - for (unsigned i = 0, e = Offsets.size(); i != e; ++i) { - Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext()); - SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed}); + auto Ret = MIRBuilder.buildInstrNoInsert(ReturnOpc); + Register ReturnAddrVReg; + if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { + ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass); + Ret.addUse(ReturnAddrVReg); } - auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG); - OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn); - if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) - return false; - MIRBuilder.insertInstr(RetInstr); + if (!lowerReturnVal(MIRBuilder, Val, VRegs, Ret)) + return false; + + if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF), + &AMDGPU::SGPR_64RegClass); + MIRBuilder.buildCopy(ReturnAddrVReg, LiveInReturn); + } + + // TODO: Handle CalleeSavedRegsViaCopy. + + MIRBuilder.insertInstr(Ret); return true; } @@ -386,6 +483,7 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel( return true; } +// TODO: Move this to generic code static void packSplitRegsToOrigType(MachineIRBuilder &MIRBuilder, ArrayRef OrigRegs, ArrayRef Regs, @@ -476,6 +574,14 @@ bool AMDGPUCallLowering::lowerFormalArguments( SmallVector ArgLocs; CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); + if (!IsEntryFunc) { + Register ReturnAddrReg = TRI->getReturnAddressReg(MF); + Register LiveInReturn = MF.addLiveIn(ReturnAddrReg, + &AMDGPU::SGPR_64RegClass); + MBB.addLiveIn(ReturnAddrReg); + MIRBuilder.buildCopy(LiveInReturn, ReturnAddrReg); + } + if (Info->hasImplicitBufferPtr()) { Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); @@ -497,9 +603,7 @@ bool AMDGPUCallLowering::lowerFormalArguments( if (!IsShader && InReg) return false; - // TODO: Handle sret. - if (Arg.hasAttribute(Attribute::StructRet) || - Arg.hasAttribute(Attribute::SwiftSelf) || + if (Arg.hasAttribute(Attribute::SwiftSelf) || Arg.hasAttribute(Attribute::SwiftError) || Arg.hasAttribute(Attribute::Nest)) return false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h index bc345b6c659b..1898c7a4c88e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -20,6 +20,7 @@ namespace llvm { class AMDGPUTargetLowering; +class MachineInstrBuilder; class AMDGPUCallLowering: public CallLowering { Register lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy, @@ -38,6 +39,10 @@ class AMDGPUCallLowering: public CallLowering { CallingConv::ID CallConv, SplitArgTy SplitArg) const; + bool lowerReturnVal(MachineIRBuilder &MIRBuilder, + const Value *Val, ArrayRef VRegs, + MachineInstrBuilder &Ret) const; + public: AMDGPUCallLowering(const AMDGPUTargetLowering &TLI); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll new file mode 100644 index 000000000000..fcf5deff710a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -0,0 +1,1063 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -global-isel-abort=0 -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s + +define i1 @i1_func_void() #0 { + ; CHECK-LABEL: name: i1_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load 1 from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + %val = load i1, i1 addrspace(1)* undef + ret i1 %val +} + +define zeroext i1 @i1_zeroext_func_void() #0 { + ; CHECK-LABEL: name: i1_zeroext_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load 1 from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) + ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + %val = load i1, i1 addrspace(1)* undef + ret i1 %val +} + +define signext i1 @i1_signext_func_void() #0 { + ; CHECK-LABEL: name: i1_signext_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load 1 from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) + ; CHECK: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + %val = load i1, i1 addrspace(1)* undef + ret i1 %val +} + +define i8 @i8_func_void() #0 { + ; CHECK-LABEL: name: i8_func_void + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `i8 addrspace(1)* undef`, addrspace 1) + %val = load i8, i8 addrspace(1)* undef + ret i8 %val +} + +define zeroext i8 @i8_zeroext_func_void() #0 { + ; CHECK-LABEL: name: i8_zeroext_func_void + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `i8 addrspace(1)* undef`, addrspace 1) + %val = load i8, i8 addrspace(1)* undef + ret i8 %val +} + +define signext i8 @i8_signext_func_void() #0 { + ; CHECK-LABEL: name: i8_signext_func_void + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `i8 addrspace(1)* undef`, addrspace 1) + %val = load i8, i8 addrspace(1)* undef + ret i8 %val +} + +define i16 @i16_func_void() #0 { + ; CHECK-LABEL: name: i16_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load 2 from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + %val = load i16, i16 addrspace(1)* undef + ret i16 %val +} + +define zeroext i16 @i16_zeroext_func_void() #0 { + ; CHECK-LABEL: name: i16_zeroext_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load 2 from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) + ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + %val = load i16, i16 addrspace(1)* undef + ret i16 %val +} + +define signext i16 @i16_signext_func_void() #0 { + ; CHECK-LABEL: name: i16_signext_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load 2 from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) + ; CHECK: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + %val = load i16, i16 addrspace(1)* undef + ret i16 %val +} + +define i32 @i32_func_void() #0 { + ; CHECK-LABEL: name: i32_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load 4 from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + %val = load i32, i32 addrspace(1)* undef + ret i32 %val +} + +define i48 @i48_func_void() #0 { + ; CHECK-LABEL: name: i48_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[DEF1]], [[LOAD]](s48), 0 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s64), 0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s64), 32 + ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK: $vgpr1 = COPY [[EXTRACT1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + %val = load i48, i48 addrspace(1)* undef, align 8 + ret i48 %val +} + +define i64 @i64_func_void() #0 { + ; CHECK-LABEL: name: i64_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load 8 from `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + %val = load i64, i64 addrspace(1)* undef + ret i64 %val +} + +define i65 @i65_func_void() #0 { + ; CHECK-LABEL: name: i65_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load 9 from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[DEF1:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[LOAD]](s65), 0 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s96), 0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s96), 32 + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s96), 64 + ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK: $vgpr1 = COPY [[EXTRACT1]](s32) + ; CHECK: $vgpr2 = COPY [[EXTRACT2]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %val = load i65, i65 addrspace(1)* undef + ret i65 %val +} + +define float @f32_func_void() #0 { + ; CHECK-LABEL: name: f32_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load 4 from `float addrspace(1)* undef`, addrspace 1) + ; CHECK: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + %val = load float, float addrspace(1)* undef + ret float %val +} + +define double @f64_func_void() #0 { + ; CHECK-LABEL: name: f64_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load 8 from `double addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + %val = load double, double addrspace(1)* undef + ret double %val +} + +define <2 x double> @v2f64_func_void() #0 { + ; CHECK-LABEL: name: v2f64_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load 16 from `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %val = load <2 x double>, <2 x double> addrspace(1)* undef + ret <2 x double> %val +} + +define <2 x i32> @v2i32_func_void() #0 { + ; CHECK-LABEL: name: v2i32_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load 8 from `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + %val = load <2 x i32>, <2 x i32> addrspace(1)* undef + ret <2 x i32> %val +} + +define <3 x i32> @v3i32_func_void() #0 { + ; CHECK-LABEL: name: v3i32_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load 12 from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %val = load <3 x i32>, <3 x i32> addrspace(1)* undef + ret <3 x i32> %val +} + +define <4 x i32> @v4i32_func_void() #0 { + ; CHECK-LABEL: name: v4i32_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load 16 from `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %val = load <4 x i32>, <4 x i32> addrspace(1)* undef + ret <4 x i32> %val +} + +define <5 x i32> @v5i32_func_void() #0 { + ; CHECK-LABEL: name: v5i32_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load 20 from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 + %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef + ret <5 x i32> %val +} + +define <8 x i32> @v8i32_func_void() #0 { + ; CHECK-LABEL: name: v8i32_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load 32 from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef + %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr + ret <8 x i32> %val +} + +define <16 x i32> @v16i32_func_void() #0 { + ; CHECK-LABEL: name: v16i32_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load 64 from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: $vgpr8 = COPY [[UV8]](s32) + ; CHECK: $vgpr9 = COPY [[UV9]](s32) + ; CHECK: $vgpr10 = COPY [[UV10]](s32) + ; CHECK: $vgpr11 = COPY [[UV11]](s32) + ; CHECK: $vgpr12 = COPY [[UV12]](s32) + ; CHECK: $vgpr13 = COPY [[UV13]](s32) + ; CHECK: $vgpr14 = COPY [[UV14]](s32) + ; CHECK: $vgpr15 = COPY [[UV15]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef + %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr + ret <16 x i32> %val +} + +define <32 x i32> @v32i32_func_void() #0 { + ; CHECK-LABEL: name: v32i32_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load 128 from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: $vgpr8 = COPY [[UV8]](s32) + ; CHECK: $vgpr9 = COPY [[UV9]](s32) + ; CHECK: $vgpr10 = COPY [[UV10]](s32) + ; CHECK: $vgpr11 = COPY [[UV11]](s32) + ; CHECK: $vgpr12 = COPY [[UV12]](s32) + ; CHECK: $vgpr13 = COPY [[UV13]](s32) + ; CHECK: $vgpr14 = COPY [[UV14]](s32) + ; CHECK: $vgpr15 = COPY [[UV15]](s32) + ; CHECK: $vgpr16 = COPY [[UV16]](s32) + ; CHECK: $vgpr17 = COPY [[UV17]](s32) + ; CHECK: $vgpr18 = COPY [[UV18]](s32) + ; CHECK: $vgpr19 = COPY [[UV19]](s32) + ; CHECK: $vgpr20 = COPY [[UV20]](s32) + ; CHECK: $vgpr21 = COPY [[UV21]](s32) + ; CHECK: $vgpr22 = COPY [[UV22]](s32) + ; CHECK: $vgpr23 = COPY [[UV23]](s32) + ; CHECK: $vgpr24 = COPY [[UV24]](s32) + ; CHECK: $vgpr25 = COPY [[UV25]](s32) + ; CHECK: $vgpr26 = COPY [[UV26]](s32) + ; CHECK: $vgpr27 = COPY [[UV27]](s32) + ; CHECK: $vgpr28 = COPY [[UV28]](s32) + ; CHECK: $vgpr29 = COPY [[UV29]](s32) + ; CHECK: $vgpr30 = COPY [[UV30]](s32) + ; CHECK: $vgpr31 = COPY [[UV31]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef + %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr + ret <32 x i32> %val +} + +define <2 x i64> @v2i64_func_void() #0 { + ; CHECK-LABEL: name: v2i64_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load 16 from `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %val = load <2 x i64>, <2 x i64> addrspace(1)* undef + ret <2 x i64> %val +} + +define <3 x i64> @v3i64_func_void() #0 { + ; CHECK-LABEL: name: v3i64_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load 24 from %ir.ptr, align 32, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef + %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr + ret <3 x i64> %val +} + +define <4 x i64> @v4i64_func_void() #0 { + ; CHECK-LABEL: name: v4i64_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load 32 from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef + %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr + ret <4 x i64> %val +} + +define <5 x i64> @v5i64_func_void() #0 { + ; CHECK-LABEL: name: v5i64_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load 40 from %ir.ptr, align 64, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: $vgpr8 = COPY [[UV8]](s32) + ; CHECK: $vgpr9 = COPY [[UV9]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 + %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef + %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr + ret <5 x i64> %val +} + +define <8 x i64> @v8i64_func_void() #0 { + ; CHECK-LABEL: name: v8i64_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load 64 from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: $vgpr8 = COPY [[UV8]](s32) + ; CHECK: $vgpr9 = COPY [[UV9]](s32) + ; CHECK: $vgpr10 = COPY [[UV10]](s32) + ; CHECK: $vgpr11 = COPY [[UV11]](s32) + ; CHECK: $vgpr12 = COPY [[UV12]](s32) + ; CHECK: $vgpr13 = COPY [[UV13]](s32) + ; CHECK: $vgpr14 = COPY [[UV14]](s32) + ; CHECK: $vgpr15 = COPY [[UV15]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef + %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr + ret <8 x i64> %val +} + +define <16 x i64> @v16i64_func_void() #0 { + ; CHECK-LABEL: name: v16i64_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load 128 from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: $vgpr8 = COPY [[UV8]](s32) + ; CHECK: $vgpr9 = COPY [[UV9]](s32) + ; CHECK: $vgpr10 = COPY [[UV10]](s32) + ; CHECK: $vgpr11 = COPY [[UV11]](s32) + ; CHECK: $vgpr12 = COPY [[UV12]](s32) + ; CHECK: $vgpr13 = COPY [[UV13]](s32) + ; CHECK: $vgpr14 = COPY [[UV14]](s32) + ; CHECK: $vgpr15 = COPY [[UV15]](s32) + ; CHECK: $vgpr16 = COPY [[UV16]](s32) + ; CHECK: $vgpr17 = COPY [[UV17]](s32) + ; CHECK: $vgpr18 = COPY [[UV18]](s32) + ; CHECK: $vgpr19 = COPY [[UV19]](s32) + ; CHECK: $vgpr20 = COPY [[UV20]](s32) + ; CHECK: $vgpr21 = COPY [[UV21]](s32) + ; CHECK: $vgpr22 = COPY [[UV22]](s32) + ; CHECK: $vgpr23 = COPY [[UV23]](s32) + ; CHECK: $vgpr24 = COPY [[UV24]](s32) + ; CHECK: $vgpr25 = COPY [[UV25]](s32) + ; CHECK: $vgpr26 = COPY [[UV26]](s32) + ; CHECK: $vgpr27 = COPY [[UV27]](s32) + ; CHECK: $vgpr28 = COPY [[UV28]](s32) + ; CHECK: $vgpr29 = COPY [[UV29]](s32) + ; CHECK: $vgpr30 = COPY [[UV30]](s32) + ; CHECK: $vgpr31 = COPY [[UV31]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef + %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr + ret <16 x i64> %val +} + +define <2 x i16> @v2i16_func_void() #0 { + ; CHECK-LABEL: name: v2i16_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load 4 from `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<2 x s16>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + %val = load <2 x i16>, <2 x i16> addrspace(1)* undef + ret <2 x i16> %val +} + +define <3 x i16> @v3i16_func_void() #0 { + ; CHECK-LABEL: name: v3i16_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load 6 from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %val = load <3 x i16>, <3 x i16> addrspace(1)* undef + ret <3 x i16> %val +} + +define <4 x i16> @v4i16_func_void() #0 { + ; CHECK-LABEL: name: v4i16_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load 8 from `<4 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK: $vgpr3 = COPY [[ANYEXT3]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %val = load <4 x i16>, <4 x i16> addrspace(1)* undef + ret <4 x i16> %val +} + +define <4 x half> @v4f16_func_void() #0 { + ; CHECK-LABEL: name: v4f16_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load 8 from `<4 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK: $vgpr3 = COPY [[ANYEXT3]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %val = load <4 x half>, <4 x half> addrspace(1)* undef + ret <4 x half> %val +} + +define <5 x i16> @v5i16_func_void() #0 { + ; CHECK-LABEL: name: v5i16_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load 10 from %ir.ptr, align 16, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD1]](<5 x s16>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK: $vgpr3 = COPY [[ANYEXT3]](s32) + ; CHECK: $vgpr4 = COPY [[ANYEXT4]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 + %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef + %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr + ret <5 x i16> %val +} + +define <8 x i16> @v8i16_func_void() #0 { + ; CHECK-LABEL: name: v8i16_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load 16 from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK: $vgpr3 = COPY [[ANYEXT3]](s32) + ; CHECK: $vgpr4 = COPY [[ANYEXT4]](s32) + ; CHECK: $vgpr5 = COPY [[ANYEXT5]](s32) + ; CHECK: $vgpr6 = COPY [[ANYEXT6]](s32) + ; CHECK: $vgpr7 = COPY [[ANYEXT7]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef + %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + ret <8 x i16> %val +} + +define <16 x i16> @v16i16_func_void() #0 { + ; CHECK-LABEL: name: v16i16_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load 32 from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s16) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s16) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s16) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[UV12]](s16) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[UV13]](s16) + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[UV14]](s16) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[UV15]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK: $vgpr3 = COPY [[ANYEXT3]](s32) + ; CHECK: $vgpr4 = COPY [[ANYEXT4]](s32) + ; CHECK: $vgpr5 = COPY [[ANYEXT5]](s32) + ; CHECK: $vgpr6 = COPY [[ANYEXT6]](s32) + ; CHECK: $vgpr7 = COPY [[ANYEXT7]](s32) + ; CHECK: $vgpr8 = COPY [[ANYEXT8]](s32) + ; CHECK: $vgpr9 = COPY [[ANYEXT9]](s32) + ; CHECK: $vgpr10 = COPY [[ANYEXT10]](s32) + ; CHECK: $vgpr11 = COPY [[ANYEXT11]](s32) + ; CHECK: $vgpr12 = COPY [[ANYEXT12]](s32) + ; CHECK: $vgpr13 = COPY [[ANYEXT13]](s32) + ; CHECK: $vgpr14 = COPY [[ANYEXT14]](s32) + ; CHECK: $vgpr15 = COPY [[ANYEXT15]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef + %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr + ret <16 x i16> %val +} + +define <16 x i8> @v16i8_func_void() #0 { + ; CHECK-LABEL: name: v16i8_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load 16 from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s8) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[UV12]](s8) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[UV13]](s8) + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[UV14]](s8) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[UV15]](s8) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK: $vgpr3 = COPY [[ANYEXT3]](s32) + ; CHECK: $vgpr4 = COPY [[ANYEXT4]](s32) + ; CHECK: $vgpr5 = COPY [[ANYEXT5]](s32) + ; CHECK: $vgpr6 = COPY [[ANYEXT6]](s32) + ; CHECK: $vgpr7 = COPY [[ANYEXT7]](s32) + ; CHECK: $vgpr8 = COPY [[ANYEXT8]](s32) + ; CHECK: $vgpr9 = COPY [[ANYEXT9]](s32) + ; CHECK: $vgpr10 = COPY [[ANYEXT10]](s32) + ; CHECK: $vgpr11 = COPY [[ANYEXT11]](s32) + ; CHECK: $vgpr12 = COPY [[ANYEXT12]](s32) + ; CHECK: $vgpr13 = COPY [[ANYEXT13]](s32) + ; CHECK: $vgpr14 = COPY [[ANYEXT14]](s32) + ; CHECK: $vgpr15 = COPY [[ANYEXT15]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef + %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + ret <16 x i8> %val +} + +define <4 x i8> @v4i8_func_void() #0 { + ; CHECK-LABEL: name: v4i8_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load 4 from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK: $vgpr3 = COPY [[ANYEXT3]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef + %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + ret <4 x i8> %val +} + +define {i8, i32} @struct_i8_i32_func_void() #0 { + ; CHECK-LABEL: name: struct_i8_i32_func_void + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[DEF]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4 from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef + ret { i8, i32 } %val +} + +define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret %arg0) #0 { + ; CHECK-LABEL: name: void_func_sret_struct_i8_i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load 1 from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: (volatile load 4 from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) + ; CHECK: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store 1 into %ir.gep01, addrspace 5) + ; CHECK: G_STORE [[LOAD1]](s32), [[GEP]](p5) :: (store 4 into %ir.gep1, addrspace 5) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] + %val0 = load volatile i8, i8 addrspace(1)* undef + %val1 = load volatile i32, i32 addrspace(1)* undef + %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 + %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1 + store i8 %val0, i8 addrspace(5)* %gep0 + store i32 %val1, i32 addrspace(5)* %gep1 + ret void +} + +; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call +; lowering introduces an extra CopyToReg/CopyFromReg obscuring the +; AssertZext inserted. Not using it introduces the spills. + +define <33 x i32> @v33i32_func_void() #0 { + ; CHECK-LABEL: name: v33i32_func_void + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load 132 from %ir.ptr, align 256, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<33 x s32>) + %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef + %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr + ret <33 x i32> %val +} + +define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { + ; CHECK-LABEL: name: struct_v32i32_i32_func_void + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load 128 from %ir.ptr, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[LOAD]], [[C]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4 from %ir.ptr + 128, align 128, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) + %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef + %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr + ret { <32 x i32>, i32 }%val +} + +define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { + ; CHECK-LABEL: name: struct_i32_v32i32_func_void + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load 4 from %ir.ptr, align 128, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[LOAD]], [[C]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[GEP]](p1) :: (load 128 from %ir.ptr + 128, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<32 x s32>) + %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef + %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr + ret { i32, <32 x i32> }%val +} + +; Make sure the last struct component is returned in v3, not v4. +define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { + ; CHECK-LABEL: name: v3i32_struct_func_void_wasted_reg + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) + ; CHECK: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) + ; CHECK: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %load0 = load volatile i32, i32 addrspace(3)* undef + %load1 = load volatile i32, i32 addrspace(3)* undef + %load2 = load volatile i32, i32 addrspace(3)* undef + %load3 = load volatile i32, i32 addrspace(3)* undef + + %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0 + %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1 + %insert.2 = insertelement <3 x i32> %insert.1, i32 %load2, i32 2 + %insert.3 = insertvalue { <3 x i32>, i32 } undef, <3 x i32> %insert.2, 0 + %insert.4 = insertvalue { <3 x i32>, i32 } %insert.3, i32 %load3, 1 + ret { <3 x i32>, i32 } %insert.4 +} + +define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { + ; CHECK-LABEL: name: v3f32_struct_func_void_wasted_reg + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `float addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `float addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `float addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p3) :: (volatile load 4 from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF2]], [[LOAD]](s32), [[C]](s32) + ; CHECK: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) + ; CHECK: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %load0 = load volatile float, float addrspace(3)* undef + %load1 = load volatile float, float addrspace(3)* undef + %load2 = load volatile float, float addrspace(3)* undef + %load3 = load volatile i32, i32 addrspace(3)* undef + + %insert.0 = insertelement <3 x float> undef, float %load0, i32 0 + %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1 + %insert.2 = insertelement <3 x float> %insert.1, float %load2, i32 2 + %insert.3 = insertvalue { <3 x float>, i32 } undef, <3 x float> %insert.2, 0 + %insert.4 = insertvalue { <3 x float>, i32 } %insert.3, i32 %load3, 1 + ret { <3 x float>, i32 } %insert.4 +} + +define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret %arg0) #0 { + ; CHECK-LABEL: name: void_func_sret_max_known_zero_bits + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) + ; CHECK: G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] + %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32 + + %lshr0 = lshr i32 %arg0.int, 16 + %lshr1 = lshr i32 %arg0.int, 17 + %lshr2 = lshr i32 %arg0.int, 18 + + store volatile i32 %lshr0, i32 addrspace(3)* undef + store volatile i32 %lshr1, i32 addrspace(3)* undef + store volatile i32 %lshr2, i32 addrspace(3)* undef + ret void +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll index 7b36a46ca0db..5f90340fbd60 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll @@ -56,7 +56,7 @@ define amdgpu_vs void @test_order(float inreg %arg0, float inreg %arg1, float %a ; CHECK: [[S1:%[0-9]+]]:_(s32) = COPY $sgpr3 ; CHECK: $sgpr0 = COPY [[S0]] ; CHECK: $sgpr1 = COPY [[S1]] -; CHECK: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 +; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 define amdgpu_vs <{ i32, i32 }> @ret_struct(i32 inreg %arg0, i32 inreg %arg1) { main_body: %tmp0 = insertvalue <{ i32, i32 }> undef, i32 %arg0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll index dcee8eaf7990..b4bdb22153d8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -4,12 +4,14 @@ define void @void_func_i1(i1 %arg0) #0 { ; CHECK-LABEL: name: void_func_i1 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store 1 into `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] store i1 %arg0, i1 addrspace(1)* undef ret void } @@ -17,15 +19,17 @@ define void @void_func_i1(i1 %arg0) #0 { define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i1_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] %ext = zext i1 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -35,15 +39,17 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { define void @void_func_i1_signext(i1 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i1_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] %ext = sext i1 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -54,9 +60,10 @@ define void @i1_arg_i1_use(i1 %arg) #0 { ; CHECK-LABEL: name: i1_arg_i1_use ; CHECK: bb.1.bb: ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -69,7 +76,8 @@ define void @i1_arg_i1_use(i1 %arg) #0 { ; CHECK: G_STORE [[C1]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: bb.3.bb2: ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s64) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] bb: br i1 %arg, label %bb2, label %bb1 @@ -84,12 +92,14 @@ bb2: define void @void_func_i8(i8 %arg0) #0 { ; CHECK-LABEL: name: void_func_i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](s8), [[DEF]](p1) :: (store 1 into `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] store i8 %arg0, i8 addrspace(1)* undef ret void } @@ -97,15 +107,17 @@ define void @void_func_i8(i8 %arg0) #0 { define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i8_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] %ext = zext i8 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -115,15 +127,17 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { define void @void_func_i8_signext(i8 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i8_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] %ext = sext i8 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -133,12 +147,14 @@ define void @void_func_i8_signext(i8 signext %arg0) #0 { define void @void_func_i16(i16 %arg0) #0 { ; CHECK-LABEL: name: void_func_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store 2 into `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] store i16 %arg0, i16 addrspace(1)* undef ret void } @@ -146,15 +162,17 @@ define void @void_func_i16(i16 %arg0) #0 { define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i16_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] %ext = zext i16 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -164,15 +182,17 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { define void @void_func_i16_signext(i16 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i16_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] %ext = sext i16 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -182,11 +202,13 @@ define void @void_func_i16_signext(i16 signext %arg0) #0 { define void @void_func_i32(i32 %arg0) #0 { ; CHECK-LABEL: name: void_func_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] store i32 %arg0, i32 addrspace(1)* undef ret void } @@ -194,13 +216,15 @@ define void @void_func_i32(i32 %arg0) #0 { define void @void_func_i64(i64 %arg0) #0 { ; CHECK-LABEL: name: void_func_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[MV]](s64), [[DEF]](p1) :: (store 8 into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]] store i64 %arg0, i64 addrspace(1)* undef ret void } @@ -208,12 +232,14 @@ define void @void_func_i64(i64 %arg0) #0 { define void @void_func_f16(half %arg0) #0 { ; CHECK-LABEL: name: void_func_f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store 2 into `half addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] store half %arg0, half addrspace(1)* undef ret void } @@ -221,11 +247,13 @@ define void @void_func_f16(half %arg0) #0 { define void @void_func_f32(float %arg0) #0 { ; CHECK-LABEL: name: void_func_f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `float addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] store float %arg0, float addrspace(1)* undef ret void } @@ -233,13 +261,15 @@ define void @void_func_f32(float %arg0) #0 { define void @void_func_f64(double %arg0) #0 { ; CHECK-LABEL: name: void_func_f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[MV]](s64), [[DEF]](p1) :: (store 8 into `double addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]] store double %arg0, double addrspace(1)* undef ret void } @@ -247,13 +277,15 @@ define void @void_func_f64(double %arg0) #0 { define void @void_func_v2i32(<2 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store 8 into `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]] store <2 x i32> %arg0, <2 x i32> addrspace(1)* undef ret void } @@ -261,14 +293,16 @@ define void @void_func_v2i32(<2 x i32> %arg0) #0 { define void @void_func_v3i32(<3 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store 12 into `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK: S_SETPC_B64_return [[COPY4]] store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef ret void } @@ -276,15 +310,17 @@ define void @void_func_v3i32(<3 x i32> %arg0) #0 { define void @void_func_v4i32(<4 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]] store <4 x i32> %arg0, <4 x i32> addrspace(1)* undef ret void } @@ -292,16 +328,18 @@ define void @void_func_v4i32(<4 x i32> %arg0) #0 { define void @void_func_v5i32(<5 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v5i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store 20 into `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] + ; CHECK: S_SETPC_B64_return [[COPY6]] store <5 x i32> %arg0, <5 x i32> addrspace(1)* undef ret void } @@ -309,7 +347,7 @@ define void @void_func_v5i32(<5 x i32> %arg0) #0 { define void @void_func_v8i32(<8 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -318,10 +356,12 @@ define void @void_func_v8i32(<8 x i32> %arg0) #0 { ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store 32 into `<8 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK: S_SETPC_B64_return [[COPY9]] store <8 x i32> %arg0, <8 x i32> addrspace(1)* undef ret void } @@ -329,7 +369,7 @@ define void @void_func_v8i32(<8 x i32> %arg0) #0 { define void @void_func_v16i32(<16 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -346,10 +386,12 @@ define void @void_func_v16i32(<16 x i32> %arg0) #0 { ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store 64 into `<16 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] + ; CHECK: S_SETPC_B64_return [[COPY17]] store <16 x i32> %arg0, <16 x i32> addrspace(1)* undef ret void } @@ -357,7 +399,7 @@ define void @void_func_v16i32(<16 x i32> %arg0) #0 { define void @void_func_v32i32(<32 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v32i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -390,10 +432,12 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 { ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] + ; CHECK: S_SETPC_B64_return [[COPY33]] store <32 x i32> %arg0, <32 x i32> addrspace(1)* undef ret void } @@ -402,7 +446,7 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 { define void @void_func_v33i32(<33 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v33i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -437,10 +481,12 @@ define void @void_func_v33i32(<33 x i32> %arg0) #0 { ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[LOAD]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store 132 into `<33 x i32> addrspace(1)* undef`, align 256, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] + ; CHECK: S_SETPC_B64_return [[COPY33]] store <33 x i32> %arg0, <33 x i32> addrspace(1)* undef ret void } @@ -448,17 +494,19 @@ define void @void_func_v33i32(<33 x i32> %arg0) #0 { define void @void_func_v2i64(<2 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<2 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]] store <2 x i64> %arg0, <2 x i64> addrspace(1)* undef ret void } @@ -466,20 +514,22 @@ define void @void_func_v2i64(<2 x i64> %arg0) #0 { define void @void_func_v3i64(<3 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store 24 into `<3 x i64> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; CHECK: S_SETPC_B64_return [[COPY7]] store <3 x i64> %arg0, <3 x i64> addrspace(1)* undef ret void } @@ -487,7 +537,7 @@ define void @void_func_v3i64(<3 x i64> %arg0) #0 { define void @void_func_v4i64(<4 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -496,6 +546,7 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 { ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) @@ -503,7 +554,8 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store 32 into `<4 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK: S_SETPC_B64_return [[COPY9]] store <4 x i64> %arg0, <4 x i64> addrspace(1)* undef ret void } @@ -511,7 +563,7 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 { define void @void_func_v5i64(<5 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v5i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -522,6 +574,7 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 { ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) @@ -530,7 +583,8 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store 40 into `<5 x i64> addrspace(1)* undef`, align 64, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] + ; CHECK: S_SETPC_B64_return [[COPY11]] store <5 x i64> %arg0, <5 x i64> addrspace(1)* undef ret void } @@ -538,7 +592,7 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 { define void @void_func_v8i64(<8 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -555,6 +609,7 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 { ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) @@ -566,7 +621,8 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store 64 into `<8 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] + ; CHECK: S_SETPC_B64_return [[COPY17]] store <8 x i64> %arg0, <8 x i64> addrspace(1)* undef ret void } @@ -574,7 +630,7 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 { define void @void_func_v16i64(<16 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -607,6 +663,7 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 { ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) @@ -626,7 +683,8 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store 128 into `<16 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] + ; CHECK: S_SETPC_B64_return [[COPY33]] store <16 x i64> %arg0, <16 x i64> addrspace(1)* undef ret void } @@ -634,14 +692,16 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 { define void @void_func_v2i16(<2 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](<2 x s16>), [[DEF]](p1) :: (store 4 into `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]] store <2 x i16> %arg0, <2 x i16> addrspace(1)* undef ret void } @@ -649,15 +709,17 @@ define void @void_func_v2i16(<2 x i16> %arg0) #0 { define void @void_func_v3i16(<3 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](<3 x s16>), [[DEF]](p1) :: (store 6 into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK: S_SETPC_B64_return [[COPY4]] store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef ret void } @@ -665,16 +727,18 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 { define void @void_func_v4i16(<4 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](<4 x s16>), [[DEF]](p1) :: (store 8 into `<4 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]] store <4 x i16> %arg0, <4 x i16> addrspace(1)* undef ret void } @@ -682,17 +746,19 @@ define void @void_func_v4i16(<4 x i16> %arg0) #0 { define void @void_func_v5i16(<5 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v5i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(<5 x s16>) = G_TRUNC [[BUILD_VECTOR]](<5 x s32>) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](<5 x s16>), [[DEF]](p1) :: (store 10 into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] + ; CHECK: S_SETPC_B64_return [[COPY6]] store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef ret void } @@ -700,7 +766,7 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 { define void @void_func_v8i16(<8 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -709,11 +775,13 @@ define void @void_func_v8i16(<8 x i16> %arg0) #0 { ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<8 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK: S_SETPC_B64_return [[COPY9]] store <8 x i16> %arg0, <8 x i16> addrspace(1)* undef ret void } @@ -721,7 +789,7 @@ define void @void_func_v8i16(<8 x i16> %arg0) #0 { define void @void_func_v16i16(<16 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -738,11 +806,13 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 { ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s16>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](<16 x s16>), [[DEF]](p1) :: (store 32 into `<16 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] + ; CHECK: S_SETPC_B64_return [[COPY17]] store <16 x i16> %arg0, <16 x i16> addrspace(1)* undef ret void } @@ -750,13 +820,15 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 { define void @void_func_v2f32(<2 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store 8 into `<2 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]] store <2 x float> %arg0, <2 x float> addrspace(1)* undef ret void } @@ -764,14 +836,16 @@ define void @void_func_v2f32(<2 x float> %arg0) #0 { define void @void_func_v3f32(<3 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store 12 into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK: S_SETPC_B64_return [[COPY4]] store <3 x float> %arg0, <3 x float> addrspace(1)* undef ret void } @@ -779,15 +853,17 @@ define void @void_func_v3f32(<3 x float> %arg0) #0 { define void @void_func_v4f32(<4 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]] store <4 x float> %arg0, <4 x float> addrspace(1)* undef ret void } @@ -795,7 +871,7 @@ define void @void_func_v4f32(<4 x float> %arg0) #0 { define void @void_func_v8f32(<8 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -804,10 +880,12 @@ define void @void_func_v8f32(<8 x float> %arg0) #0 { ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store 32 into `<8 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK: S_SETPC_B64_return [[COPY9]] store <8 x float> %arg0, <8 x float> addrspace(1)* undef ret void } @@ -815,7 +893,7 @@ define void @void_func_v8f32(<8 x float> %arg0) #0 { define void @void_func_v16f32(<16 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -832,10 +910,12 @@ define void @void_func_v16f32(<16 x float> %arg0) #0 { ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store 64 into `<16 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] + ; CHECK: S_SETPC_B64_return [[COPY17]] store <16 x float> %arg0, <16 x float> addrspace(1)* undef ret void } @@ -843,17 +923,19 @@ define void @void_func_v16f32(<16 x float> %arg0) #0 { define void @void_func_v2f64(<2 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<2 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]] store <2 x double> %arg0, <2 x double> addrspace(1)* undef ret void } @@ -861,20 +943,22 @@ define void @void_func_v2f64(<2 x double> %arg0) #0 { define void @void_func_v3f64(<3 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store 24 into `<3 x double> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; CHECK: S_SETPC_B64_return [[COPY7]] store <3 x double> %arg0, <3 x double> addrspace(1)* undef ret void } @@ -882,7 +966,7 @@ define void @void_func_v3f64(<3 x double> %arg0) #0 { define void @void_func_v4f64(<4 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -891,6 +975,7 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 { ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) @@ -898,7 +983,8 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store 32 into `<4 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK: S_SETPC_B64_return [[COPY9]] store <4 x double> %arg0, <4 x double> addrspace(1)* undef ret void } @@ -906,7 +992,7 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 { define void @void_func_v8f64(<8 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -923,6 +1009,7 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 { ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) @@ -934,7 +1021,8 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store 64 into `<8 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] + ; CHECK: S_SETPC_B64_return [[COPY17]] store <8 x double> %arg0, <8 x double> addrspace(1)* undef ret void } @@ -942,7 +1030,7 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 { define void @void_func_v16f64(<16 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -975,6 +1063,7 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 { ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) @@ -994,7 +1083,8 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store 128 into `<16 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] + ; CHECK: S_SETPC_B64_return [[COPY33]] store <16 x double> %arg0, <16 x double> addrspace(1)* undef ret void } @@ -1002,14 +1092,16 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 { define void @void_func_v2f16(<2 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](<2 x s16>), [[DEF]](p1) :: (store 4 into `<2 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]] store <2 x half> %arg0, <2 x half> addrspace(1)* undef ret void } @@ -1017,15 +1109,17 @@ define void @void_func_v2f16(<2 x half> %arg0) #0 { define void @void_func_v3f16(<3 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](<3 x s16>), [[DEF]](p1) :: (store 6 into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK: S_SETPC_B64_return [[COPY4]] store <3 x half> %arg0, <3 x half> addrspace(1)* undef ret void } @@ -1033,16 +1127,18 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 { define void @void_func_v4f16(<4 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](<4 x s16>), [[DEF]](p1) :: (store 8 into `<4 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]] store <4 x half> %arg0, <4 x half> addrspace(1)* undef ret void } @@ -1050,7 +1146,7 @@ define void @void_func_v4f16(<4 x half> %arg0) #0 { define void @void_func_v8f16(<8 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1059,11 +1155,13 @@ define void @void_func_v8f16(<8 x half> %arg0) #0 { ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<8 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK: S_SETPC_B64_return [[COPY9]] store <8 x half> %arg0, <8 x half> addrspace(1)* undef ret void } @@ -1071,7 +1169,7 @@ define void @void_func_v8f16(<8 x half> %arg0) #0 { define void @void_func_v16f16(<16 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1088,11 +1186,13 @@ define void @void_func_v16f16(<16 x half> %arg0) #0 { ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s16>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](<16 x s16>), [[DEF]](p1) :: (store 32 into `<16 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] + ; CHECK: S_SETPC_B64_return [[COPY17]] store <16 x half> %arg0, <16 x half> addrspace(1)* undef ret void } @@ -1101,18 +1201,20 @@ define void @void_func_v16f16(<16 x half> %arg0) #0 { define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 { ; CHECK-LABEL: name: void_func_i32_i64_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[MV]](s64), [[DEF1]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]] store volatile i32 %arg0, i32 addrspace(1)* undef store volatile i64 %arg1, i64 addrspace(1)* undef store volatile i32 %arg2, i32 addrspace(1)* undef @@ -1122,11 +1224,13 @@ define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 { define void @void_func_struct_i32({ i32 } %arg0) #0 { ; CHECK-LABEL: name: void_func_struct_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `{ i32 } addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] store { i32 } %arg0, { i32 } addrspace(1)* undef ret void } @@ -1134,16 +1238,18 @@ define void @void_func_struct_i32({ i32 } %arg0) #0 { define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { ; CHECK-LABEL: name: void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](s8), [[DEF]](p1) :: (store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[DEF]], [[C]](s64) ; CHECK: G_STORE [[COPY1]](s32), [[GEP]](p1) :: (store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]] store { i8, i32 } %arg0, { i8, i32 } addrspace(1)* undef ret void } @@ -1151,8 +1257,10 @@ define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval %arg0) #0 { ; CHECK-LABEL: name: void_func_byval_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (load 1 from %ir.arg0, align 4, addrspace 5) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 @@ -1162,7 +1270,8 @@ define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval %arg0 ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[DEF]], [[C1]](s64) ; CHECK: G_STORE [[LOAD2]](s32), [[GEP1]](p1) :: (store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]] %arg0.load = load { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 store { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef ret void @@ -1171,12 +1280,13 @@ define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval %arg0 define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval %arg0, { i8, i32 } addrspace(5)* byval %arg1, i32 %arg2) #0 { ; CHECK-LABEL: name: void_func_byval_struct_i8_i32_x2 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (volatile load 1 from %ir.arg0, align 4, addrspace 5) @@ -1194,7 +1304,8 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval %a ; CHECK: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[DEF]], [[C1]](s64) ; CHECK: G_STORE [[LOAD5]](s32), [[GEP3]](p1) :: (volatile store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) ; CHECK: G_STORE [[COPY]](s32), [[DEF1]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] %arg0.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 %arg1.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg1 store volatile { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef @@ -1206,17 +1317,20 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval %a define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval %arg0, i64 addrspace(5)* byval %arg1) #0 { ; CHECK-LABEL: name: void_func_byval_i32_byval_i64 ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p5) :: (load 4 from %ir.arg0, addrspace 5) ; CHECK: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[LOAD1]](p5) :: (load 8 from %ir.arg1, addrspace 5) ; CHECK: G_STORE [[LOAD2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[LOAD3]](s64), [[DEF1]](p1) :: (store 8 into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]] %arg0.load = load i32, i32 addrspace(5)* %arg0 %arg1.load = load i64, i64 addrspace(5)* %arg1 store i32 %arg0.load, i32 addrspace(1)* undef @@ -1227,7 +1341,7 @@ define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval %arg0, i64 ad define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_i32_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1266,6 +1380,7 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD1]](s32), [[LOAD2]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -1274,7 +1389,8 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[LOAD]](s32), [[DEF1]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[MV]](s64), [[DEF2]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] + ; CHECK: S_SETPC_B64_return [[COPY33]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile i32 %arg1, i32 addrspace(1)* undef store volatile i64 %arg2, i64 addrspace(1)* undef @@ -1285,7 +1401,7 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4) #0 { ; CHECK-LABEL: name: void_func_v32i32_i1_i8_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1326,6 +1442,7 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1 ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 2 from %fixed-stack.1, align 1, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 2 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -1337,7 +1454,8 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1 ; CHECK: G_STORE [[LOAD1]](s8), [[DEF2]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[LOAD2]](s16), [[DEF3]](p1) :: (volatile store 2 into `i16 addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[LOAD3]](s16), [[DEF4]](p1) :: (volatile store 2 into `half addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] + ; CHECK: S_SETPC_B64_return [[COPY33]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile i1 %arg1, i1 addrspace(1)* undef store volatile i8 %arg2, i8 addrspace(1)* undef @@ -1349,7 +1467,7 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1 define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v2i32_v2f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1390,6 +1508,7 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD2]](s32), [[LOAD3]](s32) @@ -1399,7 +1518,8 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[DEF1]](p1) :: (volatile store 8 into `<2 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[DEF2]](p1) :: (volatile store 8 into `<2 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] + ; CHECK: S_SETPC_B64_return [[COPY33]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <2 x i32> %arg1, <2 x i32> addrspace(1)* undef store volatile <2 x float> %arg2, <2 x float> addrspace(1)* undef @@ -1409,7 +1529,7 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v2i16_v2f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1450,6 +1570,7 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) @@ -1461,7 +1582,8 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[TRUNC]](<2 x s16>), [[DEF1]](p1) :: (volatile store 4 into `<2 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[TRUNC1]](<2 x s16>), [[DEF2]](p1) :: (volatile store 4 into `<2 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] + ; CHECK: S_SETPC_B64_return [[COPY33]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <2 x i16> %arg1, <2 x i16> addrspace(1)* undef store volatile <2 x half> %arg2, <2 x half> addrspace(1)* undef @@ -1471,7 +1593,7 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v2i64_v2f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1520,6 +1642,7 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) @@ -1533,7 +1656,8 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[DEF1]](p1) :: (volatile store 16 into `<2 x i64> addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[DEF2]](p1) :: (volatile store 16 into `<2 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] + ; CHECK: S_SETPC_B64_return [[COPY33]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <2 x i64> %arg1, <2 x i64> addrspace(1)* undef store volatile <2 x double> %arg2, <2 x double> addrspace(1)* undef @@ -1543,7 +1667,7 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v4i32_v4f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1592,6 +1716,7 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) @@ -1601,7 +1726,8 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[DEF1]](p1) :: (volatile store 16 into `<4 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[DEF2]](p1) :: (volatile store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] + ; CHECK: S_SETPC_B64_return [[COPY33]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <4 x i32> %arg1, <4 x i32> addrspace(1)* undef store volatile <4 x float> %arg2, <4 x float> addrspace(1)* undef @@ -1611,7 +1737,7 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v8i32_v8f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1676,6 +1802,7 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) @@ -1685,7 +1812,8 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[DEF1]](p1) :: (volatile store 32 into `<8 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[DEF2]](p1) :: (volatile store 32 into `<8 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] + ; CHECK: S_SETPC_B64_return [[COPY33]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <8 x i32> %arg1, <8 x i32> addrspace(1)* undef store volatile <8 x float> %arg2, <8 x float> addrspace(1)* undef @@ -1695,7 +1823,7 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v16i32_v16f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1792,6 +1920,7 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, ; CHECK: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) ; CHECK: [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD16]](s32), [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32), [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32), [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32), [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32) @@ -1801,7 +1930,8 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[DEF1]](p1) :: (volatile store 64 into `<16 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[DEF2]](p1) :: (volatile store 64 into `<16 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] + ; CHECK: S_SETPC_B64_return [[COPY33]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <16 x i32> %arg1, <16 x i32> addrspace(1)* undef store volatile <16 x float> %arg2, <16 x float> addrspace(1)* undef @@ -1812,11 +1942,12 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 { ; CHECK-LABEL: name: void_func_v3f32_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -1830,7 +1961,8 @@ define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 { ; CHECK: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store 4 into `float addrspace(3)* undef`, addrspace 3) ; CHECK: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store 4 into `float addrspace(3)* undef`, addrspace 3) ; CHECK: G_STORE [[COPY3]](s32), [[DEF1]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]] %arg0.0 = extractelement <3 x float> %arg0, i32 0 %arg0.1 = extractelement <3 x float> %arg0, i32 1 %arg0.2 = extractelement <3 x float> %arg0, i32 2 @@ -1844,11 +1976,12 @@ define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 { define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { ; CHECK-LABEL: name: void_func_v3i32_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -1861,7 +1994,8 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { ; CHECK: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) ; CHECK: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) ; CHECK: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]] %arg0.0 = extractelement <3 x i32> %arg0, i32 0 %arg0.1 = extractelement <3 x i32> %arg0, i32 1 %arg0.2 = extractelement <3 x i32> %arg0, i32 2 @@ -1876,7 +2010,7 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { define void @void_func_v16i8(<16 x i8> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1893,11 +2027,13 @@ define void @void_func_v16i8(<16 x i8> %arg0) #0 { ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>) ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[TRUNC]](<16 x s8>), [[DEF]](p1) :: (volatile store 16 into `<16 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] + ; CHECK: S_SETPC_B64_return [[COPY17]] store volatile <16 x i8> %arg0, <16 x i8> addrspace(1)* undef ret void } @@ -1906,7 +2042,7 @@ define void @void_func_v16i8(<16 x i8> %arg0) #0 { define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CHECK-LABEL: name: void_func_v32i32_v16i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1971,6 +2107,7 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5) ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) + ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x s32>) @@ -1978,7 +2115,8 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: G_STORE [[TRUNC]](<16 x s8>), [[DEF1]](p1) :: (volatile store 16 into `<16 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] + ; CHECK: S_SETPC_B64_return [[COPY33]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <16 x i8> %arg1, <16 x i8> addrspace(1)* undef ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll new file mode 100644 index 000000000000..c6c1a87177ff --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll @@ -0,0 +1,3 @@ +; Runs original SDAG test with -global-isel +; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %S/../ret.ll | FileCheck -check-prefix=GCN %S/../ret.ll +; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %S/../ret.ll | FileCheck -check-prefix=GCN %S/../ret.ll diff --git a/llvm/test/CodeGen/AMDGPU/ret.ll b/llvm/test/CodeGen/AMDGPU/ret.ll index e77e21220686..cee224bfa389 100644 --- a/llvm/test/CodeGen/AMDGPU/ret.ll +++ b/llvm/test/CodeGen/AMDGPU/ret.ll @@ -79,7 +79,7 @@ bb: ; GCN-LABEL: {{^}}ps_input_ena_pos_w: ; GCN-DAG: v_mov_b32_e32 v0, v4 ; GCN-DAG: v_mov_b32_e32 v1, v2 -; GCN: v_mov_b32_e32 v2, v3 +; GCN-DAG: v_mov_b32_e32 v2, v3 ; GCN-NOT: s_endpgm define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 { bb: @@ -177,8 +177,8 @@ bb: } ; GCN-LABEL: {{^}}sgpr: -; GCN: s_mov_b32 s2, s3 -; GCN: s_add_i32 s0, s3, 2 +; GCN-DAG: s_mov_b32 s2, s3 +; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2 ; GCN-NOT: s_endpgm define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { bb: @@ -206,9 +206,9 @@ bb: ; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm ; GCN-DAG: v_mov_b32_e32 v1, v0 ; GCN-DAG: s_mov_b32 s1, s2 -; GCN: s_waitcnt expcnt(0) -; GCN: v_add_f32_e32 v0, 1.0, v1 -; GCN-DAG: s_add_i32 s0, s3, 2 +; GCN-DAG: s_waitcnt expcnt(0) +; GCN-DAG: v_add_f32_e32 v0, 1.0, v1 +; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2 ; GCN-DAG: s_mov_b32 s2, s3 ; GCN-NOT: s_endpgm define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {