AMDGPU/GlobalISel: First pass at attempting to legalize load/stores

There's still a lot more to do, but this handles decomposing due to
alignment. I've gotten it to the point where nothing crashes or
infinite loops the legalizer.

llvm-svn: 371533
This commit is contained in:
Matt Arsenault 2019-09-10 16:20:14 +00:00
parent 9b23df63ec
commit c0ceca5883
13 changed files with 55829 additions and 1136 deletions

View File

@ -76,6 +76,31 @@ static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) {
};
}
// Increase the number of vector elements to reach the next multiple of 32-bit
// type.
static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT Ty = Query.Types[TypeIdx];
const LLT EltTy = Ty.getElementType();
const int Size = Ty.getSizeInBits();
const int EltSize = EltTy.getSizeInBits();
const int NextMul32 = (Size + 31) / 32;
assert(EltSize < 32);
const int NewNumElts = (32 * NextMul32 + EltSize - 1) / EltSize;
return std::make_pair(TypeIdx, LLT::vector(NewNumElts, EltTy));
};
}
static LegalityPredicate vectorSmallerThan(unsigned TypeIdx, unsigned Size) {
return [=](const LegalityQuery &Query) {
const LLT QueryTy = Query.Types[TypeIdx];
return QueryTy.isVector() && QueryTy.getSizeInBits() < Size;
};
}
static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) {
return [=](const LegalityQuery &Query) {
const LLT QueryTy = Query.Types[TypeIdx];
@ -112,6 +137,14 @@ static LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT Type) {
};
}
static LegalityPredicate isWideScalarTruncStore(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT Ty = Query.Types[TypeIdx];
return !Ty.isVector() && Ty.getSizeInBits() > 32 &&
Query.MMODescrs[0].SizeInBits < Ty.getSizeInBits();
};
}
AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const GCNTargetMachine &TM)
: ST(ST_) {
@ -126,6 +159,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const LLT S16 = LLT::scalar(16);
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
const LLT S96 = LLT::scalar(96);
const LLT S128 = LLT::scalar(128);
const LLT S256 = LLT::scalar(256);
const LLT S512 = LLT::scalar(512);
@ -246,7 +280,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.legalForCartesianProduct({S64, V2S32, V4S16})
.legalForCartesianProduct({V2S64, V4S32})
// Don't worry about the size constraint.
.legalIf(all(isPointer(0), isPointer(1)));
.legalIf(all(isPointer(0), isPointer(1)))
// FIXME: Testing hack
.legalForCartesianProduct({S16, LLT::vector(2, 8), });
getActionDefinitionsBuilder(G_FCONSTANT)
.legalFor({S32, S64, S16})
@ -358,6 +394,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
.legalFor({{S64, S32}, {S32, S16}, {S64, S16},
{S32, S1}, {S64, S1}, {S16, S1},
{S96, S32},
// FIXME: Hack
{S64, LLT::scalar(33)},
{S32, S8}, {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}})
@ -523,79 +560,229 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
// handle some operations by just promoting the register during
// selection. There are also d16 loads on GFX9+ which preserve the high bits.
getActionDefinitionsBuilder({G_LOAD, G_STORE})
.narrowScalarIf([](const LegalityQuery &Query) {
unsigned Size = Query.Types[0].getSizeInBits();
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
return (Size > 32 && MemSize < Size);
},
[](const LegalityQuery &Query) {
return std::make_pair(0, LLT::scalar(32));
})
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
.fewerElementsIf([=](const LegalityQuery &Query) {
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
return (MemSize == 96) &&
Query.Types[0].isVector() &&
!ST.hasDwordx3LoadStores();
},
[=](const LegalityQuery &Query) {
return std::make_pair(0, V2S32);
})
.legalIf([=](const LegalityQuery &Query) {
const LLT &Ty0 = Query.Types[0];
auto maxSizeForAddrSpace = [this](unsigned AS) -> unsigned {
switch (AS) {
// FIXME: Private element size.
case AMDGPUAS::PRIVATE_ADDRESS:
return 32;
// FIXME: Check subtarget
case AMDGPUAS::LOCAL_ADDRESS:
return ST.useDS128() ? 128 : 64;
unsigned Size = Ty0.getSizeInBits();
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
if (Size < 32 || (Size > 32 && MemSize < Size))
return false;
// Treat constant and global as identical. SMRD loads are sometimes usable
// for global loads (ideally constant address space should be eliminated)
// depending on the context. Legality cannot be context dependent, but
// RegBankSelect can split the load as necessary depending on the pointer
// register bank/uniformity and if the memory is invariant or not written in
// a kernel.
case AMDGPUAS::CONSTANT_ADDRESS:
case AMDGPUAS::GLOBAL_ADDRESS:
return 512;
default:
return 128;
}
};
if (Ty0.isVector() && Size != MemSize)
return false;
const auto needToSplitLoad = [=](const LegalityQuery &Query) -> bool {
const LLT DstTy = Query.Types[0];
// TODO: Decompose private loads into 4-byte components.
// TODO: Illegal flat loads on SI
switch (MemSize) {
case 8:
case 16:
return Size == 32;
case 32:
case 64:
case 128:
return true;
// Split vector extloads.
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
if (DstTy.isVector() && DstTy.getSizeInBits() > MemSize)
return true;
case 96:
return ST.hasDwordx3LoadStores();
const LLT PtrTy = Query.Types[1];
unsigned AS = PtrTy.getAddressSpace();
if (MemSize > maxSizeForAddrSpace(AS))
return true;
case 256:
case 512:
// TODO: Possibly support loads of i256 and i512 . This will require
// adding i256 and i512 types to MVT in order for to be able to use
// TableGen.
// TODO: Add support for other vector types, this will require
// defining more value mappings for the new types.
return Ty0.isVector() && (Ty0.getScalarType().getSizeInBits() == 32 ||
Ty0.getScalarType().getSizeInBits() == 64);
// Catch weird sized loads that don't evenly divide into the access sizes
// TODO: May be able to widen depending on alignment etc.
unsigned NumRegs = MemSize / 32;
if (NumRegs == 3 && !ST.hasDwordx3LoadStores())
return true;
default:
return false;
}
})
.clampScalar(0, S32, S64);
unsigned Align = Query.MMODescrs[0].AlignInBits;
if (Align < MemSize) {
const SITargetLowering *TLI = ST.getTargetLowering();
return !TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8);
}
return false;
};
unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32;
unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16;
unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8;
// TODO: Refine based on subtargets which support unaligned access or 128-bit
// LDS
// TODO: Unsupported flat for SI.
for (unsigned Op : {G_LOAD, G_STORE}) {
const bool IsStore = Op == G_STORE;
auto &Actions = getActionDefinitionsBuilder(Op);
// Whitelist the common cases.
// TODO: Pointer loads
// TODO: Wide constant loads
// TODO: Only CI+ has 3x loads
// TODO: Loads to s16 on gfx9
Actions.legalForTypesWithMemDesc({{S32, GlobalPtr, 32, GlobalAlign32},
{V2S32, GlobalPtr, 64, GlobalAlign32},
{V3S32, GlobalPtr, 96, GlobalAlign32},
{S96, GlobalPtr, 96, GlobalAlign32},
{V4S32, GlobalPtr, 128, GlobalAlign32},
{S128, GlobalPtr, 128, GlobalAlign32},
{S64, GlobalPtr, 64, GlobalAlign32},
{V2S64, GlobalPtr, 128, GlobalAlign32},
{V2S16, GlobalPtr, 32, GlobalAlign32},
{S32, GlobalPtr, 8, GlobalAlign8},
{S32, GlobalPtr, 16, GlobalAlign16},
{S32, LocalPtr, 32, 32},
{S64, LocalPtr, 64, 32},
{V2S32, LocalPtr, 64, 32},
{S32, LocalPtr, 8, 8},
{S32, LocalPtr, 16, 16},
{V2S16, LocalPtr, 32, 32},
{S32, PrivatePtr, 32, 32},
{S32, PrivatePtr, 8, 8},
{S32, PrivatePtr, 16, 16},
{V2S16, PrivatePtr, 32, 32},
{S32, FlatPtr, 32, GlobalAlign32},
{S32, FlatPtr, 16, GlobalAlign16},
{S32, FlatPtr, 8, GlobalAlign8},
{V2S16, FlatPtr, 32, GlobalAlign32},
{S32, ConstantPtr, 32, GlobalAlign32},
{V2S32, ConstantPtr, 64, GlobalAlign32},
{V3S32, ConstantPtr, 96, GlobalAlign32},
{V4S32, ConstantPtr, 128, GlobalAlign32},
{S64, ConstantPtr, 64, GlobalAlign32},
{S128, ConstantPtr, 128, GlobalAlign32},
{V2S32, ConstantPtr, 32, GlobalAlign32}});
Actions
.narrowScalarIf(
[=](const LegalityQuery &Query) -> bool {
return !Query.Types[0].isVector() && needToSplitLoad(Query);
},
[=](const LegalityQuery &Query) -> std::pair<unsigned, LLT> {
const LLT DstTy = Query.Types[0];
const LLT PtrTy = Query.Types[1];
const unsigned DstSize = DstTy.getSizeInBits();
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
// Split extloads.
if (DstSize > MemSize)
return std::make_pair(0, LLT::scalar(MemSize));
if (DstSize > 32 && (DstSize % 32 != 0)) {
// FIXME: Need a way to specify non-extload of larger size if
// suitably aligned.
return std::make_pair(0, LLT::scalar(32 * (DstSize / 32)));
}
unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace());
if (MemSize > MaxSize)
return std::make_pair(0, LLT::scalar(MaxSize));
unsigned Align = Query.MMODescrs[0].AlignInBits;
return std::make_pair(0, LLT::scalar(Align));
})
.fewerElementsIf(
[=](const LegalityQuery &Query) -> bool {
return Query.Types[0].isVector() && needToSplitLoad(Query);
},
[=](const LegalityQuery &Query) -> std::pair<unsigned, LLT> {
const LLT DstTy = Query.Types[0];
const LLT PtrTy = Query.Types[1];
LLT EltTy = DstTy.getElementType();
unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace());
// Split if it's too large for the address space.
if (Query.MMODescrs[0].SizeInBits > MaxSize) {
unsigned NumElts = DstTy.getNumElements();
unsigned NumPieces = Query.MMODescrs[0].SizeInBits / MaxSize;
// FIXME: Refine when odd breakdowns handled
// The scalars will need to be re-legalized.
if (NumPieces == 1 || NumPieces >= NumElts ||
NumElts % NumPieces != 0)
return std::make_pair(0, EltTy);
return std::make_pair(0,
LLT::vector(NumElts / NumPieces, EltTy));
}
// Need to split because of alignment.
unsigned Align = Query.MMODescrs[0].AlignInBits;
unsigned EltSize = EltTy.getSizeInBits();
if (EltSize > Align &&
(EltSize / Align < DstTy.getNumElements())) {
return std::make_pair(0, LLT::vector(EltSize / Align, EltTy));
}
// May need relegalization for the scalars.
return std::make_pair(0, EltTy);
})
.minScalar(0, S32);
if (IsStore)
Actions.narrowScalarIf(isWideScalarTruncStore(0), changeTo(0, S32));
// TODO: Need a bitcast lower option?
Actions
.legalIf([=](const LegalityQuery &Query) {
const LLT Ty0 = Query.Types[0];
unsigned Size = Ty0.getSizeInBits();
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
unsigned Align = Query.MMODescrs[0].AlignInBits;
// No extending vector loads.
if (Size > MemSize && Ty0.isVector())
return false;
// FIXME: Widening store from alignment not valid.
if (MemSize < Size)
MemSize = std::max(MemSize, Align);
switch (MemSize) {
case 8:
case 16:
return Size == 32;
case 32:
case 64:
case 128:
return true;
case 96:
return ST.hasDwordx3LoadStores();
case 256:
case 512:
return true;
default:
return false;
}
})
.widenScalarToNextPow2(0)
// TODO: v3s32->v4s32 with alignment
.moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0));
}
// FIXME: Handle alignment requirements.
auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
.legalForTypesWithMemDesc({
{S32, GlobalPtr, 8, 8},
{S32, GlobalPtr, 16, 8},
{S32, LocalPtr, 8, 8},
{S32, LocalPtr, 16, 8},
{S32, PrivatePtr, 8, 8},
{S32, PrivatePtr, 16, 8}});
.legalForTypesWithMemDesc({{S32, GlobalPtr, 8, 8},
{S32, GlobalPtr, 16, 2 * 8},
{S32, LocalPtr, 8, 8},
{S32, LocalPtr, 16, 16},
{S32, PrivatePtr, 8, 8},
{S32, PrivatePtr, 16, 16}});
if (ST.hasFlatAddressSpace()) {
ExtLoads.legalForTypesWithMemDesc({{S32, FlatPtr, 8, 8},
{S32, FlatPtr, 16, 8}});
ExtLoads.legalForTypesWithMemDesc(
{{S32, FlatPtr, 8, 8}, {S32, FlatPtr, 16, 16}});
}
ExtLoads.clampScalar(0, S32, S32)

View File

@ -1230,21 +1230,12 @@ bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
return true;
}
bool SITargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
bool *IsFast) const {
bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
unsigned Size, unsigned AddrSpace, unsigned Align,
MachineMemOperand::Flags Flags, bool *IsFast) const {
if (IsFast)
*IsFast = false;
// TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
// which isn't a simple VT.
// Until MVT is extended to handle this, simply check for the size and
// rely on the condition below: allow accesses if the size is a multiple of 4.
if (VT == MVT::Other || (VT != MVT::Other && VT.getSizeInBits() > 1024 &&
VT.getStoreSize() > 16)) {
return false;
}
if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
AddrSpace == AMDGPUAS::REGION_ADDRESS) {
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
@ -1283,7 +1274,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
}
// Smaller than dword value must be aligned.
if (VT.bitsLT(MVT::i32))
if (Size < 32)
return false;
// 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
@ -1292,7 +1283,26 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
if (IsFast)
*IsFast = true;
return VT.bitsGT(MVT::i32) && Align % 4 == 0;
return Size >= 32 && Align >= 4;
}
bool SITargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
bool *IsFast) const {
if (IsFast)
*IsFast = false;
// TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
// which isn't a simple VT.
// Until MVT is extended to handle this, simply check for the size and
// rely on the condition below: allow accesses if the size is a multiple of 4.
if (VT == MVT::Other || (VT != MVT::Other && VT.getSizeInBits() > 1024 &&
VT.getStoreSize() > 16)) {
return false;
}
return allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace,
Align, Flags, IsFast);
}
EVT SITargetLowering::getOptimalMemOpType(

View File

@ -238,6 +238,11 @@ public:
bool canMergeStoresTo(unsigned AS, EVT MemVT,
const SelectionDAG &DAG) const override;
bool allowsMisalignedMemoryAccessesImpl(
unsigned Size, unsigned AS, unsigned Align,
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *IsFast = nullptr) const;
bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AS, unsigned Align,
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,

View File

@ -0,0 +1,229 @@
# RUN: llc -march=amdgcn -mcpu=hawaii -mattr=+enable-ds128 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7-DS128 %s
---
name: load_local_v4s32_align16
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_local_v4s32_align16
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, addrspace 3)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
; GFX7-LABEL: name: load_local_v4s32_align16
; GFX7: liveins: $vgpr0
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, addrspace 3)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
; GFX7-DS128-LABEL: name: load_local_v4s32_align16
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[DS_READ_B128_:%[0-9]+]]:vreg_128 = DS_READ_B128 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 16, addrspace 3)
; GFX7-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_]]
; GFX9-LABEL: name: load_local_v4s32_align16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, addrspace 3)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 16, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
---
name: load_local_v4s32_align_4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_local_v4s32_align_4
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
; GFX7-LABEL: name: load_local_v4s32_align_4
; GFX7: liveins: $vgpr0
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
; GFX7-DS128-LABEL: name: load_local_v4s32_align_4
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX7-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
; GFX9-LABEL: name: load_local_v4s32_align_4
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
---
name: load_local_v2s64
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_local_v2s64
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
; GFX7-LABEL: name: load_local_v2s64
; GFX7: liveins: $vgpr0
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
; GFX7-DS128-LABEL: name: load_local_v2s64
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX7-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
; GFX9-LABEL: name: load_local_v2s64
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
---
name: load_local_v2p1
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_local_v2p1
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: [[LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
; GFX7-LABEL: name: load_local_v2p1
; GFX7: liveins: $vgpr0
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7: [[LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
; GFX7-DS128-LABEL: name: load_local_v2p1
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX7-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
; GFX9-LABEL: name: load_local_v2p1
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
---
name: load_local_s128
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_local_s128
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
; GFX7-LABEL: name: load_local_s128
; GFX7: liveins: $vgpr0
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
; GFX7-DS128-LABEL: name: load_local_s128
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX7-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
; GFX9-LABEL: name: load_local_s128
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
---
name: load_local_v8s16
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_local_v8s16
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
; GFX7-LABEL: name: load_local_v8s16
; GFX7: liveins: $vgpr0
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
; GFX7-DS128-LABEL: name: load_local_v8s16
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX7-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
; GFX9-LABEL: name: load_local_v8s16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...

View File

@ -5,7 +5,6 @@
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
---
name: load_local_s32_from_4
@ -29,6 +28,12 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
; GFX7: $vgpr0 = COPY [[DS_READ_B32_]]
; GFX7-DS128-LABEL: name: load_local_s32_from_4
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
; GFX7-DS128: $vgpr0 = COPY [[DS_READ_B32_]]
; GFX9-LABEL: name: load_local_s32_from_4
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -63,6 +68,12 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 2, addrspace 3)
; GFX7: $vgpr0 = COPY [[DS_READ_U16_]]
; GFX7-DS128-LABEL: name: load_local_s32_from_2
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 2, addrspace 3)
; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U16_]]
; GFX9-LABEL: name: load_local_s32_from_2
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -101,6 +112,12 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
; GFX7-DS128-LABEL: name: load_local_s32_from_1
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U8_]]
; GFX9-LABEL: name: load_local_s32_from_1
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -135,6 +152,12 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
; GFX7-DS128-LABEL: name: load_local_v2s32
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
; GFX7-DS128: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
; GFX9-LABEL: name: load_local_v2s32
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -169,6 +192,12 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
; GFX7-DS128-LABEL: name: load_local_v2s32_align4
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
; GFX9-LABEL: name: load_local_v2s32_align4
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
@ -182,107 +211,6 @@ body: |
---
name: load_local_v3s32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_local_v3s32
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: [[LOAD:%[0-9]+]]:{{vgpr|vreg_96}}(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
; GFX7-LABEL: name: load_local_v3s32
; GFX7: liveins: $vgpr0
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[LOAD:%[0-9]+]]:vreg_96(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
; GFX9-LABEL: name: load_local_v3s32
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vreg_96(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 3)
$vgpr0_vgpr1_vgpr2 = COPY %1
...
---
name: load_local_v4s32_align16
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_local_v4s32_align16
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: $m0 = S_MOV_B32 -1
; GFX6: [[DS_READ_B128_:%[0-9]+]]:vreg_128 = DS_READ_B128 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 16, addrspace 3)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_]]
; GFX7-LABEL: name: load_local_v4s32_align16
; GFX7: liveins: $vgpr0
; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_B128_:%[0-9]+]]:vreg_128 = DS_READ_B128 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 16, addrspace 3)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_]]
; GFX9-LABEL: name: load_local_v4s32_align16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 16, addrspace 3)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]]
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 16, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
---
name: load_local_v4s32_align_4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_local_v4s32_align_4
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: $m0 = S_MOV_B32 -1
; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
; GFX7-LABEL: name: load_local_v4s32_align_4
; GFX7: liveins: $vgpr0
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
; GFX9-LABEL: name: load_local_v4s32_align_4
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
---
name: load_local_s64
legalized: true
regBankSelected: true
@ -304,6 +232,12 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
; GFX7-DS128-LABEL: name: load_local_s64
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
; GFX7-DS128: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
; GFX9-LABEL: name: load_local_s64
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -338,6 +272,12 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
; GFX7-DS128-LABEL: name: load_local_s64_align4
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
; GFX9-LABEL: name: load_local_s64_align4
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
@ -351,141 +291,6 @@ body: |
---
name: load_local_v2s64
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_local_v2s64
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: $m0 = S_MOV_B32 -1
; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
; GFX7-LABEL: name: load_local_v2s64
; GFX7: liveins: $vgpr0
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
; GFX9-LABEL: name: load_local_v2s64
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
---
name: load_local_v2p1
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_local_v2p1
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: $m0 = S_MOV_B32 -1
; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
; GFX7-LABEL: name: load_local_v2p1
; GFX7: liveins: $vgpr0
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
; GFX9-LABEL: name: load_local_v2p1
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
---
name: load_local_s96
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_local_s96
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: [[LOAD:%[0-9]+]]:{{vgpr|vreg_96}}(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
; GFX7-LABEL: name: load_local_s96
; GFX7: liveins: $vgpr0
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
; GFX9-LABEL: name: load_local_s96
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 3)
$vgpr0_vgpr1_vgpr2 = COPY %1
...
---
name: load_local_s128
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_local_s128
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: $m0 = S_MOV_B32 -1
; GFX6: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
; GFX7-LABEL: name: load_local_s128
; GFX7: liveins: $vgpr0
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
; GFX9-LABEL: name: load_local_s128
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
---
name: load_local_p3_from_4
legalized: true
regBankSelected: true
@ -507,6 +312,12 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
; GFX7: $vgpr0 = COPY [[DS_READ_B32_]]
; GFX7-DS128-LABEL: name: load_local_p3_from_4
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
; GFX7-DS128: $vgpr0 = COPY [[DS_READ_B32_]]
; GFX9-LABEL: name: load_local_p3_from_4
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -541,6 +352,12 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
; GFX7: $vgpr0 = COPY [[DS_READ_B32_]]
; GFX7-DS128-LABEL: name: load_local_p5_from_4
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
; GFX7-DS128: $vgpr0 = COPY [[DS_READ_B32_]]
; GFX9-LABEL: name: load_local_p5_from_4
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -575,6 +392,12 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
; GFX7-DS128-LABEL: name: load_local_p1_align8
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
; GFX7-DS128: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
; GFX9-LABEL: name: load_local_p1_align8
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -609,6 +432,12 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
; GFX7-DS128-LABEL: name: load_local_p1_align4
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
; GFX9-LABEL: name: load_local_p1_align4
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
@ -643,6 +472,12 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
; GFX7-DS128-LABEL: name: load_local_p999_from_8
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
; GFX9-LABEL: name: load_local_p999_from_8
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
@ -677,6 +512,12 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX7-DS128-LABEL: name: load_local_v2p3
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX9-LABEL: name: load_local_v2p3
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
@ -711,6 +552,12 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
; GFX7: $vgpr0 = COPY [[DS_READ_B32_]]
; GFX7-DS128-LABEL: name: load_local_v2s16
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
; GFX7-DS128: $vgpr0 = COPY [[DS_READ_B32_]]
; GFX9-LABEL: name: load_local_v2s16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -745,6 +592,12 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
; GFX7-DS128-LABEL: name: load_local_v4s16
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
; GFX7-DS128: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
; GFX9-LABEL: name: load_local_v4s16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -777,40 +630,6 @@ body: |
# ...
---
name: load_local_v8s16
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_local_v8s16
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX6: $m0 = S_MOV_B32 -1
; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
; GFX7-LABEL: name: load_local_v8s16
; GFX7: liveins: $vgpr0
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
; GFX9-LABEL: name: load_local_v8s16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
################################################################################
### Stress addressing modes
################################################################################
@ -840,6 +659,12 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
; GFX7-DS128-LABEL: name: load_local_s32_from_1_gep_65535
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U8_]]
; GFX9-LABEL: name: load_local_s32_from_1_gep_65535
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -925,6 +750,14 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
; GFX7-DS128-LABEL: name: load_local_s32_from_1_gep_65536
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-DS128: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec
; GFX7-DS128: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U8_]]
; GFX9-LABEL: name: load_local_s32_from_1_gep_65536
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@ -967,6 +800,14 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
; GFX7-DS128-LABEL: name: load_local_s32_from_1_gep_m1
; GFX7-DS128: liveins: $vgpr0
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-DS128: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
; GFX7-DS128: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX7-DS128: $m0 = S_MOV_B32 -1
; GFX7-DS128: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U8_]]
; GFX9-LABEL: name: load_local_s32_from_1_gep_m1
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0

View File

@ -97,193 +97,6 @@ body: |
---
name: load_private_v2s32
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
scratchWaveOffsetReg: $sgpr4
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
liveins: $vgpr0
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORDX2_]]
; GFX6-LABEL: name: load_private_v2s32
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 5)
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]]
; GFX9-LABEL: name: load_private_v2s32
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 5)
; GFX9: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 5)
$vgpr0_vgpr1 = COPY %1
...
---
name: load_private_v4s32
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
scratchWaveOffsetReg: $sgpr4
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_private_v4s32
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 5)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]]
; GFX9-LABEL: name: load_private_v4s32
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 5)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 5)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
---
name: load_private_s64
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
scratchWaveOffsetReg: $sgpr4
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_private_s64
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
; GFX9-LABEL: name: load_private_s64
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 5)
$vgpr0_vgpr1 = COPY %1
...
---
name: load_private_v2s64
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
scratchWaveOffsetReg: $sgpr4
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_private_v2s64
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
; GFX9-LABEL: name: load_private_v2s64
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 5)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
---
name: load_private_v2p1
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
scratchWaveOffsetReg: $sgpr4
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_private_v2p1
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
; GFX9-LABEL: name: load_private_v2p1
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 5)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
---
name: load_private_s128
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
scratchWaveOffsetReg: $sgpr4
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_private_s128
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
; GFX6: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
; GFX9-LABEL: name: load_private_s128
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 5)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
---
name: load_private_p3_from_4
legalized: true
regBankSelected: true
@ -342,68 +155,6 @@ body: |
---
name: load_private_p999_from_8
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
scratchWaveOffsetReg: $sgpr4
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_private_p999_from_8
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
; GFX9-LABEL: name: load_private_p999_from_8
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(p999) = G_LOAD %0 :: (load 8, align 8, addrspace 5)
$vgpr0_vgpr1 = COPY %1
...
---
name: load_private_v2p3
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
scratchWaveOffsetReg: $sgpr4
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_private_v2p3
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX9-LABEL: name: load_private_v2p3
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 5)
$vgpr0_vgpr1 = COPY %1
...
---
name: load_private_v2s16
legalized: true
regBankSelected: true
@ -433,85 +184,6 @@ body: |
...
---
name: load_private_v4s16
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_private_v4s16
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
; GFX9-LABEL: name: load_private_v4s16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 5)
$vgpr0_vgpr1 = COPY %1
...
# ---
# name: load_private_v6s16
# legalized: true
# regBankSelected: true
# tracksRegLiveness: true
# machineFunctionInfo:
# scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
# scratchWaveOffsetReg: $sgpr4
# stackPtrOffsetReg: $sgpr32
# body: |
# bb.0:
# liveins: $vgpr0
# %0:vgpr(p5) = COPY $vgpr0
# %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load 12, align 4, addrspace 5)
# $vgpr0_vgpr1_vgpr2 = COPY %1
# ...
---
name: load_private_v8s16
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
scratchWaveOffsetReg: $sgpr4
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
liveins: $vgpr0
; GFX6-LABEL: name: load_private_v8s16
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
; GFX9-LABEL: name: load_private_v8s16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 5)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
################################################################################
### Stress addressing modes
################################################################################

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,443 +0,0 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s
---
name: test_load_global_i32
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_load_global_i32
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
; SI: $vgpr0 = COPY [[LOAD]](s32)
; VI-LABEL: name: test_load_global_i32
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
; VI: $vgpr0 = COPY [[LOAD]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_LOAD %0 :: (load 4, addrspace 1)
$vgpr0 = COPY %1
...
---
name: test_load_global_i64
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_load_global_i64
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
; SI: $vgpr0 = COPY [[LOAD]](s32)
; VI-LABEL: name: test_load_global_i64
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
; VI: $vgpr0 = COPY [[LOAD]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_LOAD %0 :: (load 4, addrspace 1)
$vgpr0 = COPY %1
...
---
name: test_load_global_p1
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_load_global_p1
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
; VI-LABEL: name: test_load_global_p1
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(p1) = G_LOAD %0 :: (load 8, addrspace 1)
$vgpr0_vgpr1 = COPY %1
...
---
name: test_load_global_p4
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_load_global_p4
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
; VI-LABEL: name: test_load_global_p4
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(p4) = G_LOAD %0 :: (load 8, addrspace 1)
$vgpr0_vgpr1 = COPY %1
...
---
name: test_load_global_p3
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_load_global_p3
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
; SI: $vgpr0 = COPY [[LOAD]](p3)
; VI-LABEL: name: test_load_global_p3
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
; VI: $vgpr0 = COPY [[LOAD]](p3)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(p3) = G_LOAD %0 :: (load 4, addrspace 1)
$vgpr0 = COPY %1
...
---
name: test_load_global_v2s32
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_load_global_v2s32
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
; VI-LABEL: name: test_load_global_v2s32
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = G_LOAD %0 :: (load 8, addrspace 1)
$vgpr0_vgpr1 = COPY %1
...
---
name: test_load_global_v2s16
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_load_global_v2s16
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
; SI: $vgpr0 = COPY [[LOAD]](<2 x s16>)
; VI-LABEL: name: test_load_global_v2s16
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
; VI: $vgpr0 = COPY [[LOAD]](<2 x s16>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x s16>) = G_LOAD %0 :: (load 4, addrspace 1)
$vgpr0 = COPY %1
...
---
name: test_load_global_v3i32
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_load_global_v3i32
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1)
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4, addrspace 1)
; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
; VI-LABEL: name: test_load_global_v3i32
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1)
; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 1)
$vgpr0_vgpr1_vgpr2 = COPY %1
...
---
name: test_ext_load_global_s64_from_1_align1
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_ext_load_global_s64_from_1_align1
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
; VI-LABEL: name: test_ext_load_global_s64_from_1_align1
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s64) = G_LOAD %0 :: (load 1, addrspace 1, align 4)
$vgpr0_vgpr1 = COPY %1
...
---
name: test_ext_load_global_s64_from_2_align2
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_ext_load_global_s64_from_2_align2
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
; VI-LABEL: name: test_ext_load_global_s64_from_2_align2
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s64) = G_LOAD %0 :: (load 2, addrspace 1, align 4)
$vgpr0_vgpr1 = COPY %1
...
---
name: test_ext_load_global_s64_from_4_align4
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_ext_load_global_s64_from_4_align4
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
; VI-LABEL: name: test_ext_load_global_s64_from_4_align4
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s64) = G_LOAD %0 :: (load 4, addrspace 1, align 4)
$vgpr0_vgpr1 = COPY %1
...
---
name: test_ext_load_global_s128_from_4_align4
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_ext_load_global_s128_from_4_align4
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
; SI: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s32)
; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT]](s128)
; VI-LABEL: name: test_ext_load_global_s128_from_4_align4
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
; VI: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s32)
; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT]](s128)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s128) = G_LOAD %0 :: (load 4, addrspace 1, align 4)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
...
---
name: test_load_global_s96_align4
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_load_global_s96_align4
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1)
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4, addrspace 1)
; SI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
; SI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0
; SI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
; VI-LABEL: name: test_load_global_s96_align4
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1)
; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s96) = G_LOAD %0 :: (load 12, addrspace 1, align 4)
$vgpr0_vgpr1_vgpr2 = COPY %1
...
---
name: test_load_global_s160_align4
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_load_global_s160_align4
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1)
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1)
; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64)
; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 4, addrspace 1)
; SI: [[DEF:%[0-9]+]]:_(s160) = G_IMPLICIT_DEF
; SI: [[INSERT:%[0-9]+]]:_(s160) = G_INSERT [[DEF]], [[LOAD]](s64), 0
; SI: [[INSERT1:%[0-9]+]]:_(s160) = G_INSERT [[INSERT]], [[LOAD1]](s64), 64
; SI: [[INSERT2:%[0-9]+]]:_(s160) = G_INSERT [[INSERT1]], [[LOAD2]](s32), 128
; SI: S_NOP 0, implicit [[INSERT2]](s160)
; VI-LABEL: name: test_load_global_s160_align4
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1)
; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1)
; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64)
; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 4, addrspace 1)
; VI: [[DEF:%[0-9]+]]:_(s160) = G_IMPLICIT_DEF
; VI: [[INSERT:%[0-9]+]]:_(s160) = G_INSERT [[DEF]], [[LOAD]](s64), 0
; VI: [[INSERT1:%[0-9]+]]:_(s160) = G_INSERT [[INSERT]], [[LOAD1]](s64), 64
; VI: [[INSERT2:%[0-9]+]]:_(s160) = G_INSERT [[INSERT1]], [[LOAD2]](s32), 128
; VI: S_NOP 0, implicit [[INSERT2]](s160)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s160) = G_LOAD %0 :: (load 20, addrspace 1, align 4)
S_NOP 0, implicit %1
...
---
name: test_load_global_s224_align4
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_load_global_s224_align4
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1)
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1)
; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64)
; SI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p1) :: (load 8, align 4, addrspace 1)
; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
; SI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64)
; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 4, addrspace 1)
; SI: [[DEF:%[0-9]+]]:_(s224) = G_IMPLICIT_DEF
; SI: [[INSERT:%[0-9]+]]:_(s224) = G_INSERT [[DEF]], [[LOAD]](s64), 0
; SI: [[INSERT1:%[0-9]+]]:_(s224) = G_INSERT [[INSERT]], [[LOAD1]](s64), 64
; SI: [[INSERT2:%[0-9]+]]:_(s224) = G_INSERT [[INSERT1]], [[LOAD2]](s64), 128
; SI: [[INSERT3:%[0-9]+]]:_(s224) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 192
; SI: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
; SI: [[INSERT4:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[INSERT3]](s224), 0
; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT4]](s256)
; VI-LABEL: name: test_load_global_s224_align4
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1)
; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1)
; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64)
; VI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p1) :: (load 8, align 4, addrspace 1)
; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
; VI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64)
; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 4, addrspace 1)
; VI: [[DEF:%[0-9]+]]:_(s224) = G_IMPLICIT_DEF
; VI: [[INSERT:%[0-9]+]]:_(s224) = G_INSERT [[DEF]], [[LOAD]](s64), 0
; VI: [[INSERT1:%[0-9]+]]:_(s224) = G_INSERT [[INSERT]], [[LOAD1]](s64), 64
; VI: [[INSERT2:%[0-9]+]]:_(s224) = G_INSERT [[INSERT1]], [[LOAD2]](s64), 128
; VI: [[INSERT3:%[0-9]+]]:_(s224) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 192
; VI: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
; VI: [[INSERT4:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[INSERT3]](s224), 0
; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT4]](s256)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s224) = G_LOAD %0 :: (load 28, addrspace 1, align 4)
%2:_(s256) = G_IMPLICIT_DEF
%3:_(s256) = G_INSERT %2, %1, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3
...
---
name: test_load_global_v3s32
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_load_global_v3s32
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 16, addrspace 1)
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4, align 8, addrspace 1)
; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
; VI-LABEL: name: test_load_global_v3s32
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1)
; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<3 x s32>) = G_LOAD %0 :: (load 12, addrspace 1, align 16)
$vgpr0_vgpr1_vgpr2 = COPY %1
...
---
name: test_load_constant_v8i32
body: |
bb.0:
liveins: $sgpr0_sgpr1
; CHECK-LABEL: name: test_load_global_v8i32
; CHECK: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
; CHECK: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[LOAD]](<8 x s32>)
%0:_(p4) = COPY $sgpr0_sgpr1
%1:_(<8 x s32>) = G_LOAD %0 :: (load 32, addrspace 4)
$sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %1
...
---
name: test_load_constant_v16i32
body: |
bb.0:
liveins: $sgpr0_sgpr1
; CHECK-LABEL: name: test_load_global_v16i32
; CHECK: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, addrspace 4)
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[LOAD]](<16 x s32>)
%0:_(p4) = COPY $sgpr0_sgpr1
%1:_(<16 x s32>) = G_LOAD %0 :: (load 64, addrspace 4)
$sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1
...
name: test_load_global_v3s8_align4
body: |
bb.0:
liveins: $vgpr0_vgpr1
; SI-LABEL: name: test_load_global_v3s8_align4
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1)
; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s8>)
; VI-LABEL: name: test_load_global_v3s8_align4
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1)
; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s8>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 4, addrspace 1)
S_NOP 0, implicit %1
...

View File

@ -143,12 +143,7 @@ body: |
; SI-LABEL: name: test_store_global_v3s32
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store 8, align 4, addrspace 1)
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
; SI: G_STORE [[EXTRACT1]](s32), [[GEP]](p1) :: (store 4, addrspace 1)
; SI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 4, addrspace 1)
; VI-LABEL: name: test_store_global_v3s32
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
@ -327,12 +322,7 @@ body: |
; SI-LABEL: name: test_store_global_96
; SI: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
; SI: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4
; SI: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0
; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64
; SI: G_STORE [[EXTRACT]](s64), [[COPY1]](p1) :: (store 8, align 16, addrspace 1)
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY1]], [[C]](s64)
; SI: G_STORE [[EXTRACT1]](s32), [[GEP]](p1) :: (store 4, align 8, addrspace 1)
; SI: G_STORE [[COPY]](s96), [[COPY1]](p1) :: (store 12, align 16, addrspace 1)
; VI-LABEL: name: test_store_global_96
; VI: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
; VI: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4
@ -392,14 +382,46 @@ body: |
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF
; SI: [[DEF1:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF1]], [[DEF]](<3 x s8>), 0
; SI: G_STORE [[INSERT]](<4 x s8>), [[COPY]](p1) :: (store 3, align 4, addrspace 1)
; SI: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF1]](<4 x s8>)
; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[DEF]](<3 x s8>), 0
; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT]](<4 x s16>)
; SI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<4 x s8>)
; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
; SI: G_STORE [[ANYEXT1]](s32), [[COPY]](p1) :: (store 1, align 4, addrspace 1)
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
; SI: G_STORE [[ANYEXT2]](s32), [[GEP]](p1) :: (store 1, addrspace 1)
; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64)
; SI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
; SI: G_STORE [[ANYEXT3]](s32), [[GEP1]](p1) :: (store 1, align 2, addrspace 1)
; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
; SI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64)
; SI: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
; SI: G_STORE [[ANYEXT4]](s32), [[GEP2]](p1) :: (store 1, addrspace 1)
; VI-LABEL: name: test_store_global_v3s8_align4
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF
; VI: [[DEF1:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF1]], [[DEF]](<3 x s8>), 0
; VI: G_STORE [[INSERT]](<4 x s8>), [[COPY]](p1) :: (store 3, align 4, addrspace 1)
; VI: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF1]](<4 x s8>)
; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[DEF]](<3 x s8>), 0
; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT]](<4 x s16>)
; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<4 x s8>)
; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
; VI: G_STORE [[ANYEXT1]](s32), [[COPY]](p1) :: (store 1, align 4, addrspace 1)
; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
; VI: G_STORE [[ANYEXT2]](s32), [[GEP]](p1) :: (store 1, addrspace 1)
; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64)
; VI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
; VI: G_STORE [[ANYEXT3]](s32), [[GEP1]](p1) :: (store 1, align 2, addrspace 1)
; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
; VI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64)
; VI: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
; VI: G_STORE [[ANYEXT4]](s32), [[GEP2]](p1) :: (store 1, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<3 x s8>) = G_IMPLICIT_DEF
G_STORE %1, %0 :: (store 3, addrspace 1, align 4)