forked from OSchip/llvm-project
AMDGPU/GlobalISel: Implement LLT version of allowsMisalignedMemoryAccesses
This commit is contained in:
parent
08c7d570d3
commit
6c7f640bf7
|
@ -262,7 +262,7 @@ static bool isLoadStoreSizeLegal(const GCNSubtarget &ST,
|
||||||
|
|
||||||
unsigned RegSize = Ty.getSizeInBits();
|
unsigned RegSize = Ty.getSizeInBits();
|
||||||
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
|
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
|
||||||
unsigned Align = Query.MMODescrs[0].AlignInBits;
|
unsigned AlignBits = Query.MMODescrs[0].AlignInBits;
|
||||||
unsigned AS = Query.Types[1].getAddressSpace();
|
unsigned AS = Query.Types[1].getAddressSpace();
|
||||||
|
|
||||||
// All of these need to be custom lowered to cast the pointer operand.
|
// All of these need to be custom lowered to cast the pointer operand.
|
||||||
|
@ -305,9 +305,10 @@ static bool isLoadStoreSizeLegal(const GCNSubtarget &ST,
|
||||||
|
|
||||||
assert(RegSize >= MemSize);
|
assert(RegSize >= MemSize);
|
||||||
|
|
||||||
if (Align < MemSize) {
|
if (AlignBits < MemSize) {
|
||||||
const SITargetLowering *TLI = ST.getTargetLowering();
|
const SITargetLowering *TLI = ST.getTargetLowering();
|
||||||
if (!TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8))
|
if (!TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS,
|
||||||
|
Align(AlignBits / 8)))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -954,10 +955,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
||||||
|
|
||||||
// Split vector extloads.
|
// Split vector extloads.
|
||||||
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
|
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
|
||||||
unsigned Align = Query.MMODescrs[0].AlignInBits;
|
unsigned AlignBits = Query.MMODescrs[0].AlignInBits;
|
||||||
|
|
||||||
if (MemSize < DstTy.getSizeInBits())
|
if (MemSize < DstTy.getSizeInBits())
|
||||||
MemSize = std::max(MemSize, Align);
|
MemSize = std::max(MemSize, AlignBits);
|
||||||
|
|
||||||
if (DstTy.isVector() && DstTy.getSizeInBits() > MemSize)
|
if (DstTy.isVector() && DstTy.getSizeInBits() > MemSize)
|
||||||
return true;
|
return true;
|
||||||
|
@ -979,9 +980,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Align < MemSize) {
|
if (AlignBits < MemSize) {
|
||||||
const SITargetLowering *TLI = ST.getTargetLowering();
|
const SITargetLowering *TLI = ST.getTargetLowering();
|
||||||
return !TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8);
|
return !TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS,
|
||||||
|
Align(AlignBits / 8));
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -1384,7 +1384,7 @@ bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
|
bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
|
||||||
unsigned Size, unsigned AddrSpace, unsigned Align,
|
unsigned Size, unsigned AddrSpace, Align Alignment,
|
||||||
MachineMemOperand::Flags Flags, bool *IsFast) const {
|
MachineMemOperand::Flags Flags, bool *IsFast) const {
|
||||||
if (IsFast)
|
if (IsFast)
|
||||||
*IsFast = false;
|
*IsFast = false;
|
||||||
|
@ -1394,7 +1394,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
|
||||||
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
|
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
|
||||||
// aligned, 8 byte access in a single operation using ds_read2/write2_b32
|
// aligned, 8 byte access in a single operation using ds_read2/write2_b32
|
||||||
// with adjacent offsets.
|
// with adjacent offsets.
|
||||||
bool AlignedBy4 = (Align % 4 == 0);
|
bool AlignedBy4 = Alignment >= Align(4);
|
||||||
if (IsFast)
|
if (IsFast)
|
||||||
*IsFast = AlignedBy4;
|
*IsFast = AlignedBy4;
|
||||||
|
|
||||||
|
@ -1407,7 +1407,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
|
||||||
if (!Subtarget->hasUnalignedScratchAccess() &&
|
if (!Subtarget->hasUnalignedScratchAccess() &&
|
||||||
(AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
|
(AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
|
||||||
AddrSpace == AMDGPUAS::FLAT_ADDRESS)) {
|
AddrSpace == AMDGPUAS::FLAT_ADDRESS)) {
|
||||||
bool AlignedBy4 = Align >= 4;
|
bool AlignedBy4 = Alignment >= Align(4);
|
||||||
if (IsFast)
|
if (IsFast)
|
||||||
*IsFast = AlignedBy4;
|
*IsFast = AlignedBy4;
|
||||||
|
|
||||||
|
@ -1422,7 +1422,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
|
||||||
// 2-byte alignment is worse than 1 unless doing a 2-byte accesss.
|
// 2-byte alignment is worse than 1 unless doing a 2-byte accesss.
|
||||||
*IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
|
*IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
|
||||||
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ?
|
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ?
|
||||||
Align >= 4 : Align != 2;
|
Alignment >= Align(4) : Alignment != Align(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -1438,12 +1438,12 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
|
||||||
if (IsFast)
|
if (IsFast)
|
||||||
*IsFast = true;
|
*IsFast = true;
|
||||||
|
|
||||||
return Size >= 32 && Align >= 4;
|
return Size >= 32 && Alignment >= Align(4);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SITargetLowering::allowsMisalignedMemoryAccesses(
|
bool SITargetLowering::allowsMisalignedMemoryAccesses(
|
||||||
EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
|
EVT VT, unsigned AddrSpace, unsigned Alignment,
|
||||||
bool *IsFast) const {
|
MachineMemOperand::Flags Flags, bool *IsFast) const {
|
||||||
if (IsFast)
|
if (IsFast)
|
||||||
*IsFast = false;
|
*IsFast = false;
|
||||||
|
|
||||||
|
@ -1457,7 +1457,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
|
||||||
}
|
}
|
||||||
|
|
||||||
return allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace,
|
return allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace,
|
||||||
Align, Flags, IsFast);
|
Align(Alignment), Flags, IsFast);
|
||||||
}
|
}
|
||||||
|
|
||||||
EVT SITargetLowering::getOptimalMemOpType(
|
EVT SITargetLowering::getOptimalMemOpType(
|
||||||
|
|
|
@ -260,12 +260,22 @@ public:
|
||||||
const SelectionDAG &DAG) const override;
|
const SelectionDAG &DAG) const override;
|
||||||
|
|
||||||
bool allowsMisalignedMemoryAccessesImpl(
|
bool allowsMisalignedMemoryAccessesImpl(
|
||||||
unsigned Size, unsigned AS, unsigned Align,
|
unsigned Size, unsigned AddrSpace, Align Alignment,
|
||||||
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||||
bool *IsFast = nullptr) const;
|
bool *IsFast = nullptr) const;
|
||||||
|
|
||||||
bool allowsMisalignedMemoryAccesses(
|
bool allowsMisalignedMemoryAccesses(
|
||||||
EVT VT, unsigned AS, unsigned Align,
|
LLT Ty, unsigned AddrSpace, Align Alignment,
|
||||||
|
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||||
|
bool *IsFast = nullptr) const override {
|
||||||
|
if (IsFast)
|
||||||
|
*IsFast = false;
|
||||||
|
return allowsMisalignedMemoryAccessesImpl(Ty.getSizeInBits(), AddrSpace,
|
||||||
|
Alignment, Flags, IsFast);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool allowsMisalignedMemoryAccesses(
|
||||||
|
EVT VT, unsigned AS, unsigned Alignment,
|
||||||
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||||
bool *IsFast = nullptr) const override;
|
bool *IsFast = nullptr) const override;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue