AMDGPU/GlobalISel: Implement LLT version of allowsMisalignedMemoryAccesses

This commit is contained in:
Matt Arsenault 2020-07-31 11:04:13 -04:00
parent 08c7d570d3
commit 6c7f640bf7
3 changed files with 29 additions and 17 deletions

View File

@ -262,7 +262,7 @@ static bool isLoadStoreSizeLegal(const GCNSubtarget &ST,
unsigned RegSize = Ty.getSizeInBits(); unsigned RegSize = Ty.getSizeInBits();
unsigned MemSize = Query.MMODescrs[0].SizeInBits; unsigned MemSize = Query.MMODescrs[0].SizeInBits;
unsigned Align = Query.MMODescrs[0].AlignInBits; unsigned AlignBits = Query.MMODescrs[0].AlignInBits;
unsigned AS = Query.Types[1].getAddressSpace(); unsigned AS = Query.Types[1].getAddressSpace();
// All of these need to be custom lowered to cast the pointer operand. // All of these need to be custom lowered to cast the pointer operand.
@ -305,9 +305,10 @@ static bool isLoadStoreSizeLegal(const GCNSubtarget &ST,
assert(RegSize >= MemSize); assert(RegSize >= MemSize);
if (Align < MemSize) { if (AlignBits < MemSize) {
const SITargetLowering *TLI = ST.getTargetLowering(); const SITargetLowering *TLI = ST.getTargetLowering();
if (!TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8)) if (!TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS,
Align(AlignBits / 8)))
return false; return false;
} }
@ -954,10 +955,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// Split vector extloads. // Split vector extloads.
unsigned MemSize = Query.MMODescrs[0].SizeInBits; unsigned MemSize = Query.MMODescrs[0].SizeInBits;
unsigned Align = Query.MMODescrs[0].AlignInBits; unsigned AlignBits = Query.MMODescrs[0].AlignInBits;
if (MemSize < DstTy.getSizeInBits()) if (MemSize < DstTy.getSizeInBits())
MemSize = std::max(MemSize, Align); MemSize = std::max(MemSize, AlignBits);
if (DstTy.isVector() && DstTy.getSizeInBits() > MemSize) if (DstTy.isVector() && DstTy.getSizeInBits() > MemSize)
return true; return true;
@ -979,9 +980,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
return true; return true;
} }
if (Align < MemSize) { if (AlignBits < MemSize) {
const SITargetLowering *TLI = ST.getTargetLowering(); const SITargetLowering *TLI = ST.getTargetLowering();
return !TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8); return !TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS,
Align(AlignBits / 8));
} }
return false; return false;

View File

@ -1384,7 +1384,7 @@ bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
} }
bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
unsigned Size, unsigned AddrSpace, unsigned Align, unsigned Size, unsigned AddrSpace, Align Alignment,
MachineMemOperand::Flags Flags, bool *IsFast) const { MachineMemOperand::Flags Flags, bool *IsFast) const {
if (IsFast) if (IsFast)
*IsFast = false; *IsFast = false;
@ -1394,7 +1394,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
// aligned, 8 byte access in a single operation using ds_read2/write2_b32 // aligned, 8 byte access in a single operation using ds_read2/write2_b32
// with adjacent offsets. // with adjacent offsets.
bool AlignedBy4 = (Align % 4 == 0); bool AlignedBy4 = Alignment >= Align(4);
if (IsFast) if (IsFast)
*IsFast = AlignedBy4; *IsFast = AlignedBy4;
@ -1407,7 +1407,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
if (!Subtarget->hasUnalignedScratchAccess() && if (!Subtarget->hasUnalignedScratchAccess() &&
(AddrSpace == AMDGPUAS::PRIVATE_ADDRESS || (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
AddrSpace == AMDGPUAS::FLAT_ADDRESS)) { AddrSpace == AMDGPUAS::FLAT_ADDRESS)) {
bool AlignedBy4 = Align >= 4; bool AlignedBy4 = Alignment >= Align(4);
if (IsFast) if (IsFast)
*IsFast = AlignedBy4; *IsFast = AlignedBy4;
@ -1422,7 +1422,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
// 2-byte alignment is worse than 1 unless doing a 2-byte accesss. // 2-byte alignment is worse than 1 unless doing a 2-byte accesss.
*IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS || *IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ? AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ?
Align >= 4 : Align != 2; Alignment >= Align(4) : Alignment != Align(2);
} }
return true; return true;
@ -1438,12 +1438,12 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
if (IsFast) if (IsFast)
*IsFast = true; *IsFast = true;
return Size >= 32 && Align >= 4; return Size >= 32 && Alignment >= Align(4);
} }
bool SITargetLowering::allowsMisalignedMemoryAccesses( bool SITargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, EVT VT, unsigned AddrSpace, unsigned Alignment,
bool *IsFast) const { MachineMemOperand::Flags Flags, bool *IsFast) const {
if (IsFast) if (IsFast)
*IsFast = false; *IsFast = false;
@ -1457,7 +1457,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
} }
return allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace, return allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace,
Align, Flags, IsFast); Align(Alignment), Flags, IsFast);
} }
EVT SITargetLowering::getOptimalMemOpType( EVT SITargetLowering::getOptimalMemOpType(

View File

@ -260,12 +260,22 @@ public:
const SelectionDAG &DAG) const override; const SelectionDAG &DAG) const override;
bool allowsMisalignedMemoryAccessesImpl( bool allowsMisalignedMemoryAccessesImpl(
unsigned Size, unsigned AS, unsigned Align, unsigned Size, unsigned AddrSpace, Align Alignment,
MachineMemOperand::Flags Flags = MachineMemOperand::MONone, MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *IsFast = nullptr) const; bool *IsFast = nullptr) const;
bool allowsMisalignedMemoryAccesses( bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AS, unsigned Align, LLT Ty, unsigned AddrSpace, Align Alignment,
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *IsFast = nullptr) const override {
if (IsFast)
*IsFast = false;
return allowsMisalignedMemoryAccessesImpl(Ty.getSizeInBits(), AddrSpace,
Alignment, Flags, IsFast);
}
bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AS, unsigned Alignment,
MachineMemOperand::Flags Flags = MachineMemOperand::MONone, MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *IsFast = nullptr) const override; bool *IsFast = nullptr) const override;