forked from OSchip/llvm-project
AMDGPU/GlobalISel: First pass at attempting to legalize load/stores
There's still a lot more to do, but this handles decomposing due to alignment. I've gotten it to the point where nothing crashes or infinite loops the legalizer. llvm-svn: 371533
This commit is contained in:
parent
9b23df63ec
commit
c0ceca5883
|
@ -76,6 +76,31 @@ static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) {
|
|||
};
|
||||
}
|
||||
|
||||
// Increase the number of vector elements to reach the next multiple of 32-bit
|
||||
// type.
|
||||
static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) {
|
||||
return [=](const LegalityQuery &Query) {
|
||||
const LLT Ty = Query.Types[TypeIdx];
|
||||
|
||||
const LLT EltTy = Ty.getElementType();
|
||||
const int Size = Ty.getSizeInBits();
|
||||
const int EltSize = EltTy.getSizeInBits();
|
||||
const int NextMul32 = (Size + 31) / 32;
|
||||
|
||||
assert(EltSize < 32);
|
||||
|
||||
const int NewNumElts = (32 * NextMul32 + EltSize - 1) / EltSize;
|
||||
return std::make_pair(TypeIdx, LLT::vector(NewNumElts, EltTy));
|
||||
};
|
||||
}
|
||||
|
||||
static LegalityPredicate vectorSmallerThan(unsigned TypeIdx, unsigned Size) {
|
||||
return [=](const LegalityQuery &Query) {
|
||||
const LLT QueryTy = Query.Types[TypeIdx];
|
||||
return QueryTy.isVector() && QueryTy.getSizeInBits() < Size;
|
||||
};
|
||||
}
|
||||
|
||||
static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) {
|
||||
return [=](const LegalityQuery &Query) {
|
||||
const LLT QueryTy = Query.Types[TypeIdx];
|
||||
|
@ -112,6 +137,14 @@ static LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT Type) {
|
|||
};
|
||||
}
|
||||
|
||||
static LegalityPredicate isWideScalarTruncStore(unsigned TypeIdx) {
|
||||
return [=](const LegalityQuery &Query) {
|
||||
const LLT Ty = Query.Types[TypeIdx];
|
||||
return !Ty.isVector() && Ty.getSizeInBits() > 32 &&
|
||||
Query.MMODescrs[0].SizeInBits < Ty.getSizeInBits();
|
||||
};
|
||||
}
|
||||
|
||||
AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
||||
const GCNTargetMachine &TM)
|
||||
: ST(ST_) {
|
||||
|
@ -126,6 +159,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
|||
const LLT S16 = LLT::scalar(16);
|
||||
const LLT S32 = LLT::scalar(32);
|
||||
const LLT S64 = LLT::scalar(64);
|
||||
const LLT S96 = LLT::scalar(96);
|
||||
const LLT S128 = LLT::scalar(128);
|
||||
const LLT S256 = LLT::scalar(256);
|
||||
const LLT S512 = LLT::scalar(512);
|
||||
|
@ -246,7 +280,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
|||
.legalForCartesianProduct({S64, V2S32, V4S16})
|
||||
.legalForCartesianProduct({V2S64, V4S32})
|
||||
// Don't worry about the size constraint.
|
||||
.legalIf(all(isPointer(0), isPointer(1)));
|
||||
.legalIf(all(isPointer(0), isPointer(1)))
|
||||
// FIXME: Testing hack
|
||||
.legalForCartesianProduct({S16, LLT::vector(2, 8), });
|
||||
|
||||
getActionDefinitionsBuilder(G_FCONSTANT)
|
||||
.legalFor({S32, S64, S16})
|
||||
|
@ -358,6 +394,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
|||
getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
|
||||
.legalFor({{S64, S32}, {S32, S16}, {S64, S16},
|
||||
{S32, S1}, {S64, S1}, {S16, S1},
|
||||
{S96, S32},
|
||||
// FIXME: Hack
|
||||
{S64, LLT::scalar(33)},
|
||||
{S32, S8}, {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}})
|
||||
|
@ -523,79 +560,229 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
|||
// TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
|
||||
// handle some operations by just promoting the register during
|
||||
// selection. There are also d16 loads on GFX9+ which preserve the high bits.
|
||||
getActionDefinitionsBuilder({G_LOAD, G_STORE})
|
||||
.narrowScalarIf([](const LegalityQuery &Query) {
|
||||
unsigned Size = Query.Types[0].getSizeInBits();
|
||||
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
|
||||
return (Size > 32 && MemSize < Size);
|
||||
},
|
||||
[](const LegalityQuery &Query) {
|
||||
return std::make_pair(0, LLT::scalar(32));
|
||||
})
|
||||
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
|
||||
.fewerElementsIf([=](const LegalityQuery &Query) {
|
||||
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
|
||||
return (MemSize == 96) &&
|
||||
Query.Types[0].isVector() &&
|
||||
!ST.hasDwordx3LoadStores();
|
||||
},
|
||||
[=](const LegalityQuery &Query) {
|
||||
return std::make_pair(0, V2S32);
|
||||
})
|
||||
.legalIf([=](const LegalityQuery &Query) {
|
||||
const LLT &Ty0 = Query.Types[0];
|
||||
auto maxSizeForAddrSpace = [this](unsigned AS) -> unsigned {
|
||||
switch (AS) {
|
||||
// FIXME: Private element size.
|
||||
case AMDGPUAS::PRIVATE_ADDRESS:
|
||||
return 32;
|
||||
// FIXME: Check subtarget
|
||||
case AMDGPUAS::LOCAL_ADDRESS:
|
||||
return ST.useDS128() ? 128 : 64;
|
||||
|
||||
unsigned Size = Ty0.getSizeInBits();
|
||||
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
|
||||
if (Size < 32 || (Size > 32 && MemSize < Size))
|
||||
return false;
|
||||
// Treat constant and global as identical. SMRD loads are sometimes usable
|
||||
// for global loads (ideally constant address space should be eliminated)
|
||||
// depending on the context. Legality cannot be context dependent, but
|
||||
// RegBankSelect can split the load as necessary depending on the pointer
|
||||
// register bank/uniformity and if the memory is invariant or not written in
|
||||
// a kernel.
|
||||
case AMDGPUAS::CONSTANT_ADDRESS:
|
||||
case AMDGPUAS::GLOBAL_ADDRESS:
|
||||
return 512;
|
||||
default:
|
||||
return 128;
|
||||
}
|
||||
};
|
||||
|
||||
if (Ty0.isVector() && Size != MemSize)
|
||||
return false;
|
||||
const auto needToSplitLoad = [=](const LegalityQuery &Query) -> bool {
|
||||
const LLT DstTy = Query.Types[0];
|
||||
|
||||
// TODO: Decompose private loads into 4-byte components.
|
||||
// TODO: Illegal flat loads on SI
|
||||
switch (MemSize) {
|
||||
case 8:
|
||||
case 16:
|
||||
return Size == 32;
|
||||
case 32:
|
||||
case 64:
|
||||
case 128:
|
||||
return true;
|
||||
// Split vector extloads.
|
||||
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
|
||||
if (DstTy.isVector() && DstTy.getSizeInBits() > MemSize)
|
||||
return true;
|
||||
|
||||
case 96:
|
||||
return ST.hasDwordx3LoadStores();
|
||||
const LLT PtrTy = Query.Types[1];
|
||||
unsigned AS = PtrTy.getAddressSpace();
|
||||
if (MemSize > maxSizeForAddrSpace(AS))
|
||||
return true;
|
||||
|
||||
case 256:
|
||||
case 512:
|
||||
// TODO: Possibly support loads of i256 and i512 . This will require
|
||||
// adding i256 and i512 types to MVT in order for to be able to use
|
||||
// TableGen.
|
||||
// TODO: Add support for other vector types, this will require
|
||||
// defining more value mappings for the new types.
|
||||
return Ty0.isVector() && (Ty0.getScalarType().getSizeInBits() == 32 ||
|
||||
Ty0.getScalarType().getSizeInBits() == 64);
|
||||
// Catch weird sized loads that don't evenly divide into the access sizes
|
||||
// TODO: May be able to widen depending on alignment etc.
|
||||
unsigned NumRegs = MemSize / 32;
|
||||
if (NumRegs == 3 && !ST.hasDwordx3LoadStores())
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
})
|
||||
.clampScalar(0, S32, S64);
|
||||
unsigned Align = Query.MMODescrs[0].AlignInBits;
|
||||
if (Align < MemSize) {
|
||||
const SITargetLowering *TLI = ST.getTargetLowering();
|
||||
return !TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8);
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32;
|
||||
unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16;
|
||||
unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8;
|
||||
|
||||
// TODO: Refine based on subtargets which support unaligned access or 128-bit
|
||||
// LDS
|
||||
// TODO: Unsupported flat for SI.
|
||||
|
||||
for (unsigned Op : {G_LOAD, G_STORE}) {
|
||||
const bool IsStore = Op == G_STORE;
|
||||
|
||||
auto &Actions = getActionDefinitionsBuilder(Op);
|
||||
// Whitelist the common cases.
|
||||
// TODO: Pointer loads
|
||||
// TODO: Wide constant loads
|
||||
// TODO: Only CI+ has 3x loads
|
||||
// TODO: Loads to s16 on gfx9
|
||||
Actions.legalForTypesWithMemDesc({{S32, GlobalPtr, 32, GlobalAlign32},
|
||||
{V2S32, GlobalPtr, 64, GlobalAlign32},
|
||||
{V3S32, GlobalPtr, 96, GlobalAlign32},
|
||||
{S96, GlobalPtr, 96, GlobalAlign32},
|
||||
{V4S32, GlobalPtr, 128, GlobalAlign32},
|
||||
{S128, GlobalPtr, 128, GlobalAlign32},
|
||||
{S64, GlobalPtr, 64, GlobalAlign32},
|
||||
{V2S64, GlobalPtr, 128, GlobalAlign32},
|
||||
{V2S16, GlobalPtr, 32, GlobalAlign32},
|
||||
{S32, GlobalPtr, 8, GlobalAlign8},
|
||||
{S32, GlobalPtr, 16, GlobalAlign16},
|
||||
|
||||
{S32, LocalPtr, 32, 32},
|
||||
{S64, LocalPtr, 64, 32},
|
||||
{V2S32, LocalPtr, 64, 32},
|
||||
{S32, LocalPtr, 8, 8},
|
||||
{S32, LocalPtr, 16, 16},
|
||||
{V2S16, LocalPtr, 32, 32},
|
||||
|
||||
{S32, PrivatePtr, 32, 32},
|
||||
{S32, PrivatePtr, 8, 8},
|
||||
{S32, PrivatePtr, 16, 16},
|
||||
{V2S16, PrivatePtr, 32, 32},
|
||||
|
||||
{S32, FlatPtr, 32, GlobalAlign32},
|
||||
{S32, FlatPtr, 16, GlobalAlign16},
|
||||
{S32, FlatPtr, 8, GlobalAlign8},
|
||||
{V2S16, FlatPtr, 32, GlobalAlign32},
|
||||
|
||||
{S32, ConstantPtr, 32, GlobalAlign32},
|
||||
{V2S32, ConstantPtr, 64, GlobalAlign32},
|
||||
{V3S32, ConstantPtr, 96, GlobalAlign32},
|
||||
{V4S32, ConstantPtr, 128, GlobalAlign32},
|
||||
{S64, ConstantPtr, 64, GlobalAlign32},
|
||||
{S128, ConstantPtr, 128, GlobalAlign32},
|
||||
{V2S32, ConstantPtr, 32, GlobalAlign32}});
|
||||
Actions
|
||||
.narrowScalarIf(
|
||||
[=](const LegalityQuery &Query) -> bool {
|
||||
return !Query.Types[0].isVector() && needToSplitLoad(Query);
|
||||
},
|
||||
[=](const LegalityQuery &Query) -> std::pair<unsigned, LLT> {
|
||||
const LLT DstTy = Query.Types[0];
|
||||
const LLT PtrTy = Query.Types[1];
|
||||
|
||||
const unsigned DstSize = DstTy.getSizeInBits();
|
||||
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
|
||||
|
||||
// Split extloads.
|
||||
if (DstSize > MemSize)
|
||||
return std::make_pair(0, LLT::scalar(MemSize));
|
||||
|
||||
if (DstSize > 32 && (DstSize % 32 != 0)) {
|
||||
// FIXME: Need a way to specify non-extload of larger size if
|
||||
// suitably aligned.
|
||||
return std::make_pair(0, LLT::scalar(32 * (DstSize / 32)));
|
||||
}
|
||||
|
||||
unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace());
|
||||
if (MemSize > MaxSize)
|
||||
return std::make_pair(0, LLT::scalar(MaxSize));
|
||||
|
||||
unsigned Align = Query.MMODescrs[0].AlignInBits;
|
||||
return std::make_pair(0, LLT::scalar(Align));
|
||||
})
|
||||
.fewerElementsIf(
|
||||
[=](const LegalityQuery &Query) -> bool {
|
||||
return Query.Types[0].isVector() && needToSplitLoad(Query);
|
||||
},
|
||||
[=](const LegalityQuery &Query) -> std::pair<unsigned, LLT> {
|
||||
const LLT DstTy = Query.Types[0];
|
||||
const LLT PtrTy = Query.Types[1];
|
||||
|
||||
LLT EltTy = DstTy.getElementType();
|
||||
unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace());
|
||||
|
||||
// Split if it's too large for the address space.
|
||||
if (Query.MMODescrs[0].SizeInBits > MaxSize) {
|
||||
unsigned NumElts = DstTy.getNumElements();
|
||||
unsigned NumPieces = Query.MMODescrs[0].SizeInBits / MaxSize;
|
||||
|
||||
// FIXME: Refine when odd breakdowns handled
|
||||
// The scalars will need to be re-legalized.
|
||||
if (NumPieces == 1 || NumPieces >= NumElts ||
|
||||
NumElts % NumPieces != 0)
|
||||
return std::make_pair(0, EltTy);
|
||||
|
||||
return std::make_pair(0,
|
||||
LLT::vector(NumElts / NumPieces, EltTy));
|
||||
}
|
||||
|
||||
// Need to split because of alignment.
|
||||
unsigned Align = Query.MMODescrs[0].AlignInBits;
|
||||
unsigned EltSize = EltTy.getSizeInBits();
|
||||
if (EltSize > Align &&
|
||||
(EltSize / Align < DstTy.getNumElements())) {
|
||||
return std::make_pair(0, LLT::vector(EltSize / Align, EltTy));
|
||||
}
|
||||
|
||||
// May need relegalization for the scalars.
|
||||
return std::make_pair(0, EltTy);
|
||||
})
|
||||
.minScalar(0, S32);
|
||||
|
||||
if (IsStore)
|
||||
Actions.narrowScalarIf(isWideScalarTruncStore(0), changeTo(0, S32));
|
||||
|
||||
// TODO: Need a bitcast lower option?
|
||||
Actions
|
||||
.legalIf([=](const LegalityQuery &Query) {
|
||||
const LLT Ty0 = Query.Types[0];
|
||||
unsigned Size = Ty0.getSizeInBits();
|
||||
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
|
||||
unsigned Align = Query.MMODescrs[0].AlignInBits;
|
||||
|
||||
// No extending vector loads.
|
||||
if (Size > MemSize && Ty0.isVector())
|
||||
return false;
|
||||
|
||||
// FIXME: Widening store from alignment not valid.
|
||||
if (MemSize < Size)
|
||||
MemSize = std::max(MemSize, Align);
|
||||
|
||||
switch (MemSize) {
|
||||
case 8:
|
||||
case 16:
|
||||
return Size == 32;
|
||||
case 32:
|
||||
case 64:
|
||||
case 128:
|
||||
return true;
|
||||
case 96:
|
||||
return ST.hasDwordx3LoadStores();
|
||||
case 256:
|
||||
case 512:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
})
|
||||
.widenScalarToNextPow2(0)
|
||||
// TODO: v3s32->v4s32 with alignment
|
||||
.moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0));
|
||||
}
|
||||
|
||||
// FIXME: Handle alignment requirements.
|
||||
auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
|
||||
.legalForTypesWithMemDesc({
|
||||
{S32, GlobalPtr, 8, 8},
|
||||
{S32, GlobalPtr, 16, 8},
|
||||
{S32, LocalPtr, 8, 8},
|
||||
{S32, LocalPtr, 16, 8},
|
||||
{S32, PrivatePtr, 8, 8},
|
||||
{S32, PrivatePtr, 16, 8}});
|
||||
.legalForTypesWithMemDesc({{S32, GlobalPtr, 8, 8},
|
||||
{S32, GlobalPtr, 16, 2 * 8},
|
||||
{S32, LocalPtr, 8, 8},
|
||||
{S32, LocalPtr, 16, 16},
|
||||
{S32, PrivatePtr, 8, 8},
|
||||
{S32, PrivatePtr, 16, 16}});
|
||||
if (ST.hasFlatAddressSpace()) {
|
||||
ExtLoads.legalForTypesWithMemDesc({{S32, FlatPtr, 8, 8},
|
||||
{S32, FlatPtr, 16, 8}});
|
||||
ExtLoads.legalForTypesWithMemDesc(
|
||||
{{S32, FlatPtr, 8, 8}, {S32, FlatPtr, 16, 16}});
|
||||
}
|
||||
|
||||
ExtLoads.clampScalar(0, S32, S32)
|
||||
|
|
|
@ -1230,21 +1230,12 @@ bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool SITargetLowering::allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
|
||||
bool *IsFast) const {
|
||||
bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
|
||||
unsigned Size, unsigned AddrSpace, unsigned Align,
|
||||
MachineMemOperand::Flags Flags, bool *IsFast) const {
|
||||
if (IsFast)
|
||||
*IsFast = false;
|
||||
|
||||
// TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
|
||||
// which isn't a simple VT.
|
||||
// Until MVT is extended to handle this, simply check for the size and
|
||||
// rely on the condition below: allow accesses if the size is a multiple of 4.
|
||||
if (VT == MVT::Other || (VT != MVT::Other && VT.getSizeInBits() > 1024 &&
|
||||
VT.getStoreSize() > 16)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
AddrSpace == AMDGPUAS::REGION_ADDRESS) {
|
||||
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
|
||||
|
@ -1283,7 +1274,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
|
|||
}
|
||||
|
||||
// Smaller than dword value must be aligned.
|
||||
if (VT.bitsLT(MVT::i32))
|
||||
if (Size < 32)
|
||||
return false;
|
||||
|
||||
// 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
|
||||
|
@ -1292,7 +1283,26 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
|
|||
if (IsFast)
|
||||
*IsFast = true;
|
||||
|
||||
return VT.bitsGT(MVT::i32) && Align % 4 == 0;
|
||||
return Size >= 32 && Align >= 4;
|
||||
}
|
||||
|
||||
bool SITargetLowering::allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
|
||||
bool *IsFast) const {
|
||||
if (IsFast)
|
||||
*IsFast = false;
|
||||
|
||||
// TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
|
||||
// which isn't a simple VT.
|
||||
// Until MVT is extended to handle this, simply check for the size and
|
||||
// rely on the condition below: allow accesses if the size is a multiple of 4.
|
||||
if (VT == MVT::Other || (VT != MVT::Other && VT.getSizeInBits() > 1024 &&
|
||||
VT.getStoreSize() > 16)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace,
|
||||
Align, Flags, IsFast);
|
||||
}
|
||||
|
||||
EVT SITargetLowering::getOptimalMemOpType(
|
||||
|
|
|
@ -238,6 +238,11 @@ public:
|
|||
bool canMergeStoresTo(unsigned AS, EVT MemVT,
|
||||
const SelectionDAG &DAG) const override;
|
||||
|
||||
bool allowsMisalignedMemoryAccessesImpl(
|
||||
unsigned Size, unsigned AS, unsigned Align,
|
||||
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||
bool *IsFast = nullptr) const;
|
||||
|
||||
bool allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned AS, unsigned Align,
|
||||
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||
|
|
|
@ -0,0 +1,229 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=hawaii -mattr=+enable-ds128 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7-DS128 %s
|
||||
|
||||
---
|
||||
|
||||
name: load_local_v4s32_align16
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_local_v4s32_align16
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
|
||||
; GFX7-LABEL: name: load_local_v4s32_align16
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
|
||||
; GFX7-DS128-LABEL: name: load_local_v4s32_align16
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[DS_READ_B128_:%[0-9]+]]:vreg_128 = DS_READ_B128 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 16, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_]]
|
||||
; GFX9-LABEL: name: load_local_v4s32_align16
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 16, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_local_v4s32_align_4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_local_v4s32_align_4
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
|
||||
; GFX7-LABEL: name: load_local_v4s32_align_4
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
|
||||
; GFX7-DS128-LABEL: name: load_local_v4s32_align_4
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
|
||||
; GFX9-LABEL: name: load_local_v4s32_align_4
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_local_v2s64
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_local_v2s64
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
|
||||
; GFX7-LABEL: name: load_local_v2s64
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
|
||||
; GFX7-DS128-LABEL: name: load_local_v2s64
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
|
||||
; GFX9-LABEL: name: load_local_v2s64
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_local_v2p1
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_local_v2p1
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
|
||||
; GFX7-LABEL: name: load_local_v2p1
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
|
||||
; GFX7-DS128-LABEL: name: load_local_v2p1
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
|
||||
; GFX9-LABEL: name: load_local_v2p1
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_local_s128
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_local_s128
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
|
||||
; GFX7-LABEL: name: load_local_s128
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
|
||||
; GFX7-DS128-LABEL: name: load_local_s128
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
|
||||
; GFX9-LABEL: name: load_local_s128
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_local_v8s16
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_local_v8s16
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
|
||||
; GFX7-LABEL: name: load_local_v8s16
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
|
||||
; GFX7-DS128-LABEL: name: load_local_v8s16
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
|
||||
; GFX9-LABEL: name: load_local_v8s16
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
||||
...
|
|
@ -5,7 +5,6 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
|
||||
---
|
||||
|
||||
name: load_local_s32_from_4
|
||||
|
@ -29,6 +28,12 @@ body: |
|
|||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
|
||||
; GFX7: $vgpr0 = COPY [[DS_READ_B32_]]
|
||||
; GFX7-DS128-LABEL: name: load_local_s32_from_4
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0 = COPY [[DS_READ_B32_]]
|
||||
; GFX9-LABEL: name: load_local_s32_from_4
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
|
@ -63,6 +68,12 @@ body: |
|
|||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 2, addrspace 3)
|
||||
; GFX7: $vgpr0 = COPY [[DS_READ_U16_]]
|
||||
; GFX7-DS128-LABEL: name: load_local_s32_from_2
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 2, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U16_]]
|
||||
; GFX9-LABEL: name: load_local_s32_from_2
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
|
@ -101,6 +112,12 @@ body: |
|
|||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
|
||||
; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
|
||||
; GFX7-DS128-LABEL: name: load_local_s32_from_1
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U8_]]
|
||||
; GFX9-LABEL: name: load_local_s32_from_1
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
|
@ -135,6 +152,12 @@ body: |
|
|||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
|
||||
; GFX7-DS128-LABEL: name: load_local_v2s32
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
|
||||
; GFX9-LABEL: name: load_local_v2s32
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
|
@ -169,6 +192,12 @@ body: |
|
|||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
|
||||
; GFX7-DS128-LABEL: name: load_local_v2s32_align4
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
|
||||
; GFX9-LABEL: name: load_local_v2s32_align4
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
|
@ -182,107 +211,6 @@ body: |
|
|||
|
||||
---
|
||||
|
||||
name: load_local_v3s32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_local_v3s32
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: [[LOAD:%[0-9]+]]:{{vgpr|vreg_96}}(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
|
||||
; GFX7-LABEL: name: load_local_v3s32
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_96(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
|
||||
; GFX9-LABEL: name: load_local_v3s32
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_96(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_local_v4s32_align16
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_local_v4s32_align16
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: $m0 = S_MOV_B32 -1
|
||||
; GFX6: [[DS_READ_B128_:%[0-9]+]]:vreg_128 = DS_READ_B128 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 16, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_]]
|
||||
; GFX7-LABEL: name: load_local_v4s32_align16
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[DS_READ_B128_:%[0-9]+]]:vreg_128 = DS_READ_B128 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 16, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_]]
|
||||
; GFX9-LABEL: name: load_local_v4s32_align16
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 16, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]]
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 16, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_local_v4s32_align_4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_local_v4s32_align_4
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: $m0 = S_MOV_B32 -1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
|
||||
; GFX7-LABEL: name: load_local_v4s32_align_4
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
|
||||
; GFX9-LABEL: name: load_local_v4s32_align_4
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_local_s64
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
@ -304,6 +232,12 @@ body: |
|
|||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
|
||||
; GFX7-DS128-LABEL: name: load_local_s64
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
|
||||
; GFX9-LABEL: name: load_local_s64
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
|
@ -338,6 +272,12 @@ body: |
|
|||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
|
||||
; GFX7-DS128-LABEL: name: load_local_s64_align4
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
|
||||
; GFX9-LABEL: name: load_local_s64_align4
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
|
@ -351,141 +291,6 @@ body: |
|
|||
|
||||
---
|
||||
|
||||
name: load_local_v2s64
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_local_v2s64
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: $m0 = S_MOV_B32 -1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
|
||||
; GFX7-LABEL: name: load_local_v2s64
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
|
||||
; GFX9-LABEL: name: load_local_v2s64
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_local_v2p1
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_local_v2p1
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: $m0 = S_MOV_B32 -1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
|
||||
; GFX7-LABEL: name: load_local_v2p1
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
|
||||
; GFX9-LABEL: name: load_local_v2p1
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_local_s96
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_local_s96
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: [[LOAD:%[0-9]+]]:{{vgpr|vreg_96}}(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
|
||||
; GFX7-LABEL: name: load_local_s96
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
|
||||
; GFX9-LABEL: name: load_local_s96
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_local_s128
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_local_s128
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: $m0 = S_MOV_B32 -1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
|
||||
; GFX7-LABEL: name: load_local_s128
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
|
||||
; GFX9-LABEL: name: load_local_s128
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_local_p3_from_4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
@ -507,6 +312,12 @@ body: |
|
|||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
|
||||
; GFX7: $vgpr0 = COPY [[DS_READ_B32_]]
|
||||
; GFX7-DS128-LABEL: name: load_local_p3_from_4
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0 = COPY [[DS_READ_B32_]]
|
||||
; GFX9-LABEL: name: load_local_p3_from_4
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
|
@ -541,6 +352,12 @@ body: |
|
|||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
|
||||
; GFX7: $vgpr0 = COPY [[DS_READ_B32_]]
|
||||
; GFX7-DS128-LABEL: name: load_local_p5_from_4
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0 = COPY [[DS_READ_B32_]]
|
||||
; GFX9-LABEL: name: load_local_p5_from_4
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
|
@ -575,6 +392,12 @@ body: |
|
|||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
|
||||
; GFX7-DS128-LABEL: name: load_local_p1_align8
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
|
||||
; GFX9-LABEL: name: load_local_p1_align8
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
|
@ -609,6 +432,12 @@ body: |
|
|||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
|
||||
; GFX7-DS128-LABEL: name: load_local_p1_align4
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
|
||||
; GFX9-LABEL: name: load_local_p1_align4
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
|
@ -643,6 +472,12 @@ body: |
|
|||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
|
||||
; GFX7-DS128-LABEL: name: load_local_p999_from_8
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
|
||||
; GFX9-LABEL: name: load_local_p999_from_8
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
|
@ -677,6 +512,12 @@ body: |
|
|||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
|
||||
; GFX7-DS128-LABEL: name: load_local_v2p3
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
|
||||
; GFX9-LABEL: name: load_local_v2p3
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
|
@ -711,6 +552,12 @@ body: |
|
|||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
|
||||
; GFX7: $vgpr0 = COPY [[DS_READ_B32_]]
|
||||
; GFX7-DS128-LABEL: name: load_local_v2s16
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0 = COPY [[DS_READ_B32_]]
|
||||
; GFX9-LABEL: name: load_local_v2s16
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
|
@ -745,6 +592,12 @@ body: |
|
|||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
|
||||
; GFX7-DS128-LABEL: name: load_local_v4s16
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
|
||||
; GFX9-LABEL: name: load_local_v4s16
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
|
@ -777,40 +630,6 @@ body: |
|
|||
|
||||
# ...
|
||||
|
||||
---
|
||||
|
||||
name: load_local_v8s16
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_local_v8s16
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: $m0 = S_MOV_B32 -1
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
|
||||
; GFX7-LABEL: name: load_local_v8s16
|
||||
; GFX7: liveins: $vgpr0
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
|
||||
; GFX9-LABEL: name: load_local_v8s16
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
################################################################################
|
||||
### Stress addressing modes
|
||||
################################################################################
|
||||
|
@ -840,6 +659,12 @@ body: |
|
|||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
|
||||
; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
|
||||
; GFX7-DS128-LABEL: name: load_local_s32_from_1_gep_65535
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U8_]]
|
||||
; GFX9-LABEL: name: load_local_s32_from_1_gep_65535
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
|
@ -925,6 +750,14 @@ body: |
|
|||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
|
||||
; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
|
||||
; GFX7-DS128-LABEL: name: load_local_s32_from_1_gep_65536
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7-DS128: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec
|
||||
; GFX7-DS128: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U8_]]
|
||||
; GFX9-LABEL: name: load_local_s32_from_1_gep_65536
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
|
@ -967,6 +800,14 @@ body: |
|
|||
; GFX7: $m0 = S_MOV_B32 -1
|
||||
; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
|
||||
; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
|
||||
; GFX7-DS128-LABEL: name: load_local_s32_from_1_gep_m1
|
||||
; GFX7-DS128: liveins: $vgpr0
|
||||
; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7-DS128: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
|
||||
; GFX7-DS128: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX7-DS128: $m0 = S_MOV_B32 -1
|
||||
; GFX7-DS128: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
|
||||
; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U8_]]
|
||||
; GFX9-LABEL: name: load_local_s32_from_1_gep_m1
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
|
|
|
@ -97,193 +97,6 @@ body: |
|
|||
|
||||
---
|
||||
|
||||
name: load_private_v2s32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
scratchWaveOffsetReg: $sgpr4
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
; GFX6-LABEL: name: load_private_v2s32
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 5)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_v2s32
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 5)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 5)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_private_v4s32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
scratchWaveOffsetReg: $sgpr4
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_private_v4s32
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 5)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_v4s32
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 5)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 5)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_private_s64
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
scratchWaveOffsetReg: $sgpr4
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_private_s64
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
|
||||
; GFX9-LABEL: name: load_private_s64
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 5)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_private_v2s64
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
scratchWaveOffsetReg: $sgpr4
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_private_v2s64
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
|
||||
; GFX9-LABEL: name: load_private_v2s64
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 5)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_private_v2p1
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
scratchWaveOffsetReg: $sgpr4
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_private_v2p1
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
|
||||
; GFX9-LABEL: name: load_private_v2p1
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 5)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_private_s128
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
scratchWaveOffsetReg: $sgpr4
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_private_s128
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
|
||||
; GFX9-LABEL: name: load_private_s128
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 5)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_private_p3_from_4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
@ -342,68 +155,6 @@ body: |
|
|||
|
||||
---
|
||||
|
||||
name: load_private_p999_from_8
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
scratchWaveOffsetReg: $sgpr4
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_private_p999_from_8
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
|
||||
; GFX9-LABEL: name: load_private_p999_from_8
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(p999) = G_LOAD %0 :: (load 8, align 8, addrspace 5)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_private_v2p3
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
scratchWaveOffsetReg: $sgpr4
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_private_v2p3
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
|
||||
; GFX9-LABEL: name: load_private_v2p3
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 5)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_private_v2s16
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
@ -433,85 +184,6 @@ body: |
|
|||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: load_private_v4s16
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_private_v4s16
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
|
||||
; GFX9-LABEL: name: load_private_v4s16
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 5)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
# ---
|
||||
|
||||
# name: load_private_v6s16
|
||||
# legalized: true
|
||||
# regBankSelected: true
|
||||
# tracksRegLiveness: true
|
||||
# machineFunctionInfo:
|
||||
# scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
# scratchWaveOffsetReg: $sgpr4
|
||||
# stackPtrOffsetReg: $sgpr32
|
||||
|
||||
# body: |
|
||||
# bb.0:
|
||||
# liveins: $vgpr0
|
||||
|
||||
# %0:vgpr(p5) = COPY $vgpr0
|
||||
# %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load 12, align 4, addrspace 5)
|
||||
# $vgpr0_vgpr1_vgpr2 = COPY %1
|
||||
|
||||
# ...
|
||||
|
||||
---
|
||||
|
||||
name: load_private_v8s16
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
scratchWaveOffsetReg: $sgpr4
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX6-LABEL: name: load_private_v8s16
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
|
||||
; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
|
||||
; GFX9-LABEL: name: load_private_v8s16
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
|
||||
; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 5)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
################################################################################
|
||||
### Stress addressing modes
|
||||
################################################################################
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,443 +0,0 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s
|
||||
|
||||
---
|
||||
name: test_load_global_i32
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; SI-LABEL: name: test_load_global_i32
|
||||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
|
||||
; SI: $vgpr0 = COPY [[LOAD]](s32)
|
||||
; VI-LABEL: name: test_load_global_i32
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
|
||||
; VI: $vgpr0 = COPY [[LOAD]](s32)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(s32) = G_LOAD %0 :: (load 4, addrspace 1)
|
||||
$vgpr0 = COPY %1
|
||||
...
|
||||
|
||||
---
|
||||
name: test_load_global_i64
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; SI-LABEL: name: test_load_global_i64
|
||||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
|
||||
; SI: $vgpr0 = COPY [[LOAD]](s32)
|
||||
; VI-LABEL: name: test_load_global_i64
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
|
||||
; VI: $vgpr0 = COPY [[LOAD]](s32)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(s32) = G_LOAD %0 :: (load 4, addrspace 1)
|
||||
$vgpr0 = COPY %1
|
||||
...
|
||||
|
||||
---
|
||||
name: test_load_global_p1
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; SI-LABEL: name: test_load_global_p1
|
||||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
|
||||
; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
|
||||
; VI-LABEL: name: test_load_global_p1
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
|
||||
; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(p1) = G_LOAD %0 :: (load 8, addrspace 1)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
...
|
||||
|
||||
---
|
||||
name: test_load_global_p4
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; SI-LABEL: name: test_load_global_p4
|
||||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
|
||||
; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
|
||||
; VI-LABEL: name: test_load_global_p4
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
|
||||
; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(p4) = G_LOAD %0 :: (load 8, addrspace 1)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
...
|
||||
|
||||
---
|
||||
name: test_load_global_p3
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; SI-LABEL: name: test_load_global_p3
|
||||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
|
||||
; SI: $vgpr0 = COPY [[LOAD]](p3)
|
||||
; VI-LABEL: name: test_load_global_p3
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
|
||||
; VI: $vgpr0 = COPY [[LOAD]](p3)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(p3) = G_LOAD %0 :: (load 4, addrspace 1)
|
||||
$vgpr0 = COPY %1
|
||||
...
|
||||
|
||||
---
|
||||
name: test_load_global_v2s32
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; SI-LABEL: name: test_load_global_v2s32
|
||||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
|
||||
; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
|
||||
; VI-LABEL: name: test_load_global_v2s32
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
|
||||
; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(<2 x s32>) = G_LOAD %0 :: (load 8, addrspace 1)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: test_load_global_v2s16
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; SI-LABEL: name: test_load_global_v2s16
|
||||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
|
||||
; SI: $vgpr0 = COPY [[LOAD]](<2 x s16>)
|
||||
; VI-LABEL: name: test_load_global_v2s16
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
|
||||
; VI: $vgpr0 = COPY [[LOAD]](<2 x s16>)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(<2 x s16>) = G_LOAD %0 :: (load 4, addrspace 1)
|
||||
$vgpr0 = COPY %1
|
||||
...
|
||||
|
||||
---
|
||||
name: test_load_global_v3i32
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; SI-LABEL: name: test_load_global_v3i32
|
||||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1)
|
||||
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
|
||||
; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4, addrspace 1)
|
||||
; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
|
||||
; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
|
||||
; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
|
||||
; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
|
||||
; VI-LABEL: name: test_load_global_v3i32
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1)
|
||||
; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 1)
|
||||
$vgpr0_vgpr1_vgpr2 = COPY %1
|
||||
...
|
||||
|
||||
---
|
||||
name: test_ext_load_global_s64_from_1_align1
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; SI-LABEL: name: test_ext_load_global_s64_from_1_align1
|
||||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
|
||||
; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
|
||||
; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
|
||||
; VI-LABEL: name: test_ext_load_global_s64_from_1_align1
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
|
||||
; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
|
||||
; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = G_LOAD %0 :: (load 1, addrspace 1, align 4)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
...
|
||||
|
||||
---
|
||||
name: test_ext_load_global_s64_from_2_align2
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; SI-LABEL: name: test_ext_load_global_s64_from_2_align2
|
||||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
|
||||
; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
|
||||
; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
|
||||
; VI-LABEL: name: test_ext_load_global_s64_from_2_align2
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
|
||||
; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
|
||||
; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = G_LOAD %0 :: (load 2, addrspace 1, align 4)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
...
|
||||
|
||||
---
|
||||
name: test_ext_load_global_s64_from_4_align4
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; SI-LABEL: name: test_ext_load_global_s64_from_4_align4
|
||||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
|
||||
; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
|
||||
; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
|
||||
; VI-LABEL: name: test_ext_load_global_s64_from_4_align4
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
|
||||
; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
|
||||
; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = G_LOAD %0 :: (load 4, addrspace 1, align 4)
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
...
|
||||
|
||||
---
|
||||
name: test_ext_load_global_s128_from_4_align4
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; SI-LABEL: name: test_ext_load_global_s128_from_4_align4
|
||||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
|
||||
; SI: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s32)
|
||||
; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT]](s128)
|
||||
; VI-LABEL: name: test_ext_load_global_s128_from_4_align4
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
|
||||
; VI: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s32)
|
||||
; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT]](s128)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(s128) = G_LOAD %0 :: (load 4, addrspace 1, align 4)
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: test_load_global_s96_align4
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; SI-LABEL: name: test_load_global_s96_align4
|
||||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1)
|
||||
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
|
||||
; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4, addrspace 1)
|
||||
; SI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
|
||||
; SI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0
|
||||
; SI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
|
||||
; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
|
||||
; VI-LABEL: name: test_load_global_s96_align4
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1)
|
||||
; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(s96) = G_LOAD %0 :: (load 12, addrspace 1, align 4)
|
||||
$vgpr0_vgpr1_vgpr2 = COPY %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: test_load_global_s160_align4
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; SI-LABEL: name: test_load_global_s160_align4
|
||||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1)
|
||||
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
|
||||
; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1)
|
||||
; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64)
|
||||
; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 4, addrspace 1)
|
||||
; SI: [[DEF:%[0-9]+]]:_(s160) = G_IMPLICIT_DEF
|
||||
; SI: [[INSERT:%[0-9]+]]:_(s160) = G_INSERT [[DEF]], [[LOAD]](s64), 0
|
||||
; SI: [[INSERT1:%[0-9]+]]:_(s160) = G_INSERT [[INSERT]], [[LOAD1]](s64), 64
|
||||
; SI: [[INSERT2:%[0-9]+]]:_(s160) = G_INSERT [[INSERT1]], [[LOAD2]](s32), 128
|
||||
; SI: S_NOP 0, implicit [[INSERT2]](s160)
|
||||
; VI-LABEL: name: test_load_global_s160_align4
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1)
|
||||
; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
|
||||
; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1)
|
||||
; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64)
|
||||
; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 4, addrspace 1)
|
||||
; VI: [[DEF:%[0-9]+]]:_(s160) = G_IMPLICIT_DEF
|
||||
; VI: [[INSERT:%[0-9]+]]:_(s160) = G_INSERT [[DEF]], [[LOAD]](s64), 0
|
||||
; VI: [[INSERT1:%[0-9]+]]:_(s160) = G_INSERT [[INSERT]], [[LOAD1]](s64), 64
|
||||
; VI: [[INSERT2:%[0-9]+]]:_(s160) = G_INSERT [[INSERT1]], [[LOAD2]](s32), 128
|
||||
; VI: S_NOP 0, implicit [[INSERT2]](s160)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(s160) = G_LOAD %0 :: (load 20, addrspace 1, align 4)
|
||||
S_NOP 0, implicit %1
|
||||
...
|
||||
|
||||
---
|
||||
name: test_load_global_s224_align4
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; SI-LABEL: name: test_load_global_s224_align4
|
||||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1)
|
||||
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
|
||||
; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1)
|
||||
; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64)
|
||||
; SI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p1) :: (load 8, align 4, addrspace 1)
|
||||
; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
|
||||
; SI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64)
|
||||
; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 4, addrspace 1)
|
||||
; SI: [[DEF:%[0-9]+]]:_(s224) = G_IMPLICIT_DEF
|
||||
; SI: [[INSERT:%[0-9]+]]:_(s224) = G_INSERT [[DEF]], [[LOAD]](s64), 0
|
||||
; SI: [[INSERT1:%[0-9]+]]:_(s224) = G_INSERT [[INSERT]], [[LOAD1]](s64), 64
|
||||
; SI: [[INSERT2:%[0-9]+]]:_(s224) = G_INSERT [[INSERT1]], [[LOAD2]](s64), 128
|
||||
; SI: [[INSERT3:%[0-9]+]]:_(s224) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 192
|
||||
; SI: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
|
||||
; SI: [[INSERT4:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[INSERT3]](s224), 0
|
||||
; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT4]](s256)
|
||||
; VI-LABEL: name: test_load_global_s224_align4
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1)
|
||||
; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
|
||||
; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p1) :: (load 8, align 4, addrspace 1)
|
||||
; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64)
|
||||
; VI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p1) :: (load 8, align 4, addrspace 1)
|
||||
; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
|
||||
; VI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64)
|
||||
; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 4, addrspace 1)
|
||||
; VI: [[DEF:%[0-9]+]]:_(s224) = G_IMPLICIT_DEF
|
||||
; VI: [[INSERT:%[0-9]+]]:_(s224) = G_INSERT [[DEF]], [[LOAD]](s64), 0
|
||||
; VI: [[INSERT1:%[0-9]+]]:_(s224) = G_INSERT [[INSERT]], [[LOAD1]](s64), 64
|
||||
; VI: [[INSERT2:%[0-9]+]]:_(s224) = G_INSERT [[INSERT1]], [[LOAD2]](s64), 128
|
||||
; VI: [[INSERT3:%[0-9]+]]:_(s224) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 192
|
||||
; VI: [[DEF1:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
|
||||
; VI: [[INSERT4:%[0-9]+]]:_(s256) = G_INSERT [[DEF1]], [[INSERT3]](s224), 0
|
||||
; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT4]](s256)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(s224) = G_LOAD %0 :: (load 28, addrspace 1, align 4)
|
||||
|
||||
%2:_(s256) = G_IMPLICIT_DEF
|
||||
%3:_(s256) = G_INSERT %2, %1, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: test_load_global_v3s32
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; SI-LABEL: name: test_load_global_v3s32
|
||||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 16, addrspace 1)
|
||||
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
|
||||
; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4, align 8, addrspace 1)
|
||||
; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
|
||||
; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
|
||||
; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
|
||||
; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
|
||||
; VI-LABEL: name: test_load_global_v3s32
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1)
|
||||
; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(<3 x s32>) = G_LOAD %0 :: (load 12, addrspace 1, align 16)
|
||||
$vgpr0_vgpr1_vgpr2 = COPY %1
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: test_load_constant_v8i32
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1
|
||||
|
||||
; CHECK-LABEL: name: test_load_global_v8i32
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
|
||||
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[LOAD]](<8 x s32>)
|
||||
%0:_(p4) = COPY $sgpr0_sgpr1
|
||||
%1:_(<8 x s32>) = G_LOAD %0 :: (load 32, addrspace 4)
|
||||
$sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %1
|
||||
...
|
||||
|
||||
---
|
||||
name: test_load_constant_v16i32
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1
|
||||
|
||||
; CHECK-LABEL: name: test_load_global_v16i32
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
|
||||
; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, addrspace 4)
|
||||
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[LOAD]](<16 x s32>)
|
||||
%0:_(p4) = COPY $sgpr0_sgpr1
|
||||
%1:_(<16 x s32>) = G_LOAD %0 :: (load 64, addrspace 4)
|
||||
$sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1
|
||||
...
|
||||
|
||||
name: test_load_global_v3s8_align4
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; SI-LABEL: name: test_load_global_v3s8_align4
|
||||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1)
|
||||
; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
|
||||
; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s8>)
|
||||
; VI-LABEL: name: test_load_global_v3s8_align4
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1)
|
||||
; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0
|
||||
; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s8>)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 4, addrspace 1)
|
||||
S_NOP 0, implicit %1
|
||||
...
|
|
@ -143,12 +143,7 @@ body: |
|
|||
; SI-LABEL: name: test_store_global_v3s32
|
||||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
|
||||
; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
|
||||
; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
|
||||
; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store 8, align 4, addrspace 1)
|
||||
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
|
||||
; SI: G_STORE [[EXTRACT1]](s32), [[GEP]](p1) :: (store 4, addrspace 1)
|
||||
; SI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 4, addrspace 1)
|
||||
; VI-LABEL: name: test_store_global_v3s32
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
|
||||
|
@ -327,12 +322,7 @@ body: |
|
|||
; SI-LABEL: name: test_store_global_96
|
||||
; SI: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
|
||||
; SI: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4
|
||||
; SI: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0
|
||||
; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64
|
||||
; SI: G_STORE [[EXTRACT]](s64), [[COPY1]](p1) :: (store 8, align 16, addrspace 1)
|
||||
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY1]], [[C]](s64)
|
||||
; SI: G_STORE [[EXTRACT1]](s32), [[GEP]](p1) :: (store 4, align 8, addrspace 1)
|
||||
; SI: G_STORE [[COPY]](s96), [[COPY1]](p1) :: (store 12, align 16, addrspace 1)
|
||||
; VI-LABEL: name: test_store_global_96
|
||||
; VI: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
|
||||
; VI: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4
|
||||
|
@ -392,14 +382,46 @@ body: |
|
|||
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; SI: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF
|
||||
; SI: [[DEF1:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
|
||||
; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF1]], [[DEF]](<3 x s8>), 0
|
||||
; SI: G_STORE [[INSERT]](<4 x s8>), [[COPY]](p1) :: (store 3, align 4, addrspace 1)
|
||||
; SI: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF1]](<4 x s8>)
|
||||
; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[DEF]](<3 x s8>), 0
|
||||
; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT]](<4 x s16>)
|
||||
; SI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<4 x s8>)
|
||||
; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
|
||||
; SI: G_STORE [[ANYEXT1]](s32), [[COPY]](p1) :: (store 1, align 4, addrspace 1)
|
||||
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
|
||||
; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
|
||||
; SI: G_STORE [[ANYEXT2]](s32), [[GEP]](p1) :: (store 1, addrspace 1)
|
||||
; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
|
||||
; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64)
|
||||
; SI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
|
||||
; SI: G_STORE [[ANYEXT3]](s32), [[GEP1]](p1) :: (store 1, align 2, addrspace 1)
|
||||
; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
|
||||
; SI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64)
|
||||
; SI: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
|
||||
; SI: G_STORE [[ANYEXT4]](s32), [[GEP2]](p1) :: (store 1, addrspace 1)
|
||||
; VI-LABEL: name: test_store_global_v3s8_align4
|
||||
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; VI: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF
|
||||
; VI: [[DEF1:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
|
||||
; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF1]], [[DEF]](<3 x s8>), 0
|
||||
; VI: G_STORE [[INSERT]](<4 x s8>), [[COPY]](p1) :: (store 3, align 4, addrspace 1)
|
||||
; VI: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF1]](<4 x s8>)
|
||||
; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[DEF]](<3 x s8>), 0
|
||||
; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT]](<4 x s16>)
|
||||
; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<4 x s8>)
|
||||
; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
|
||||
; VI: G_STORE [[ANYEXT1]](s32), [[COPY]](p1) :: (store 1, align 4, addrspace 1)
|
||||
; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
|
||||
; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
|
||||
; VI: G_STORE [[ANYEXT2]](s32), [[GEP]](p1) :: (store 1, addrspace 1)
|
||||
; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
|
||||
; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64)
|
||||
; VI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
|
||||
; VI: G_STORE [[ANYEXT3]](s32), [[GEP1]](p1) :: (store 1, align 2, addrspace 1)
|
||||
; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
|
||||
; VI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64)
|
||||
; VI: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
|
||||
; VI: G_STORE [[ANYEXT4]](s32), [[GEP2]](p1) :: (store 1, addrspace 1)
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(<3 x s8>) = G_IMPLICIT_DEF
|
||||
G_STORE %1, %0 :: (store 3, addrspace 1, align 4)
|
||||
|
|
Loading…
Reference in New Issue