forked from OSchip/llvm-project
[NFC] Introduce an API for MemOp
Summary: This patch introduces an API for MemOp in order to simplify and tighten the client code. Reviewers: courbet Subscribers: arsenm, nemanjai, jvesely, nhaehnle, hiraditya, kbarton, jsji, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D73964
This commit is contained in:
parent
89ca4b9ef2
commit
f85d3408e6
|
@ -154,15 +154,35 @@ public:
|
|||
}
|
||||
|
||||
uint64_t size() const { return Size; }
|
||||
uint64_t getDstAlign() const {
|
||||
return DstAlignCanChange ? 0 : DstAlign.value();
|
||||
Align getDstAlign() const {
|
||||
assert(!DstAlignCanChange);
|
||||
return DstAlign;
|
||||
}
|
||||
bool isFixedDstAlign() const { return !DstAlignCanChange; }
|
||||
bool allowOverlap() const { return AllowOverlap; }
|
||||
bool isMemset() const { return IsMemset; }
|
||||
bool isMemcpy() const { return !IsMemset; }
|
||||
bool isZeroMemset() const { return ZeroMemset; }
|
||||
bool isMemcpyStrSrc() const { return MemcpyStrSrc; }
|
||||
uint64_t getSrcAlign() const { return isMemset() ? 0 : SrcAlign.value(); }
|
||||
bool isMemcpyWithFixedDstAlign() const {
|
||||
return isMemcpy() && !DstAlignCanChange;
|
||||
}
|
||||
bool isZeroMemset() const { return isMemset() && ZeroMemset; }
|
||||
bool isMemcpyStrSrc() const {
|
||||
assert(isMemcpy() && "Must be a memcpy");
|
||||
return MemcpyStrSrc;
|
||||
}
|
||||
Align getSrcAlign() const {
|
||||
assert(isMemcpy() && "Must be a memcpy");
|
||||
return SrcAlign;
|
||||
}
|
||||
bool isSrcAligned(Align AlignCheck) const {
|
||||
return isMemset() || llvm::isAligned(AlignCheck, SrcAlign.value());
|
||||
}
|
||||
bool isDstAligned(Align AlignCheck) const {
|
||||
return DstAlignCanChange || llvm::isAligned(AlignCheck, DstAlign.value());
|
||||
}
|
||||
bool isAligned(Align AlignCheck) const {
|
||||
return isSrcAligned(AlignCheck) && isDstAligned(AlignCheck);
|
||||
}
|
||||
};
|
||||
|
||||
/// This base class for TargetLowering contains the SelectionDAG-independent
|
||||
|
|
|
@ -860,7 +860,7 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
|
|||
unsigned DstAS, unsigned SrcAS,
|
||||
const AttributeList &FuncAttributes,
|
||||
const TargetLowering &TLI) {
|
||||
if (Op.getSrcAlign() != 0 && Op.getSrcAlign() < Op.getDstAlign())
|
||||
if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
|
||||
return false;
|
||||
|
||||
LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
|
||||
|
@ -870,16 +870,18 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
|
|||
// We only need to check DstAlign here as SrcAlign is always greater or
|
||||
// equal to DstAlign (or zero).
|
||||
Ty = LLT::scalar(64);
|
||||
while (Op.getDstAlign() && Op.getDstAlign() < Ty.getSizeInBytes() &&
|
||||
!TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
|
||||
Ty = LLT::scalar(Ty.getSizeInBytes());
|
||||
if (Op.isFixedDstAlign())
|
||||
while (Op.getDstAlign() < Ty.getSizeInBytes() &&
|
||||
!TLI.allowsMisalignedMemoryAccesses(Ty, DstAS,
|
||||
Op.getDstAlign().value()))
|
||||
Ty = LLT::scalar(Ty.getSizeInBytes());
|
||||
assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
|
||||
// FIXME: check for the largest legal type we can load/store to.
|
||||
}
|
||||
|
||||
unsigned NumMemOps = 0;
|
||||
auto Size = Op.size();
|
||||
while (Size != 0) {
|
||||
uint64_t Size = Op.size();
|
||||
while (Size) {
|
||||
unsigned TySize = Ty.getSizeInBytes();
|
||||
while (TySize > Size) {
|
||||
// For now, only use non-vector load / store's for the left-over pieces.
|
||||
|
@ -899,7 +901,8 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
|
|||
MVT VT = getMVTForLLT(Ty);
|
||||
if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
|
||||
TLI.allowsMisalignedMemoryAccesses(
|
||||
VT, DstAS, Op.getDstAlign(), MachineMemOperand::MONone, &Fast) &&
|
||||
VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
|
||||
MachineMemOperand::MONone, &Fast) &&
|
||||
Fast)
|
||||
TySize = Size;
|
||||
else {
|
||||
|
|
|
@ -179,14 +179,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
|
|||
bool TargetLowering::findOptimalMemOpLowering(
|
||||
std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
|
||||
unsigned SrcAS, const AttributeList &FuncAttributes) const {
|
||||
// If 'SrcAlign' is zero, that means the memory operation does not need to
|
||||
// load the value, i.e. memset or memcpy from constant string. Otherwise,
|
||||
// it's the inferred alignment of the source. 'DstAlign', on the other hand,
|
||||
// is the specified alignment of the memory operation. If it is zero, that
|
||||
// means it's possible to change the alignment of the destination.
|
||||
// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
|
||||
// not need to be loaded.
|
||||
if (!(Op.getSrcAlign() == 0 || Op.getSrcAlign() >= Op.getDstAlign()))
|
||||
if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
|
||||
return false;
|
||||
|
||||
EVT VT = getOptimalMemOpType(Op, FuncAttributes);
|
||||
|
@ -196,9 +189,11 @@ bool TargetLowering::findOptimalMemOpLowering(
|
|||
// We only need to check DstAlign here as SrcAlign is always greater or
|
||||
// equal to DstAlign (or zero).
|
||||
VT = MVT::i64;
|
||||
while (Op.getDstAlign() && Op.getDstAlign() < VT.getSizeInBits() / 8 &&
|
||||
!allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
|
||||
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
|
||||
if (Op.isFixedDstAlign())
|
||||
while (
|
||||
Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
|
||||
!allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign().value()))
|
||||
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
|
||||
assert(VT.isInteger());
|
||||
|
||||
// Find the largest legal integer type.
|
||||
|
@ -214,8 +209,8 @@ bool TargetLowering::findOptimalMemOpLowering(
|
|||
}
|
||||
|
||||
unsigned NumMemOps = 0;
|
||||
auto Size = Op.size();
|
||||
while (Size != 0) {
|
||||
uint64_t Size = Op.size();
|
||||
while (Size) {
|
||||
unsigned VTSize = VT.getSizeInBits() / 8;
|
||||
while (VTSize > Size) {
|
||||
// For now, only use non-vector load / store's for the left-over pieces.
|
||||
|
@ -250,8 +245,9 @@ bool TargetLowering::findOptimalMemOpLowering(
|
|||
// issuing a (or a pair of) unaligned and overlapping load / store.
|
||||
bool Fast;
|
||||
if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
|
||||
allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign(),
|
||||
MachineMemOperand::MONone, &Fast) &&
|
||||
allowsMisalignedMemoryAccesses(
|
||||
VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
|
||||
MachineMemOperand::MONone, &Fast) &&
|
||||
Fast)
|
||||
VTSize = Size;
|
||||
else {
|
||||
|
|
|
@ -9413,11 +9413,6 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
|
||||
unsigned AlignCheck) {
|
||||
return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
|
||||
(DstAlign == 0 || DstAlign % AlignCheck == 0));
|
||||
}
|
||||
|
||||
EVT AArch64TargetLowering::getOptimalMemOpType(
|
||||
const MemOp &Op, const AttributeList &FuncAttributes) const {
|
||||
|
@ -9429,8 +9424,8 @@ EVT AArch64TargetLowering::getOptimalMemOpType(
|
|||
// taken one instruction to materialize the v2i64 zero and one store (with
|
||||
// restrictive addressing mode). Just do i64 stores.
|
||||
bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
|
||||
auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) {
|
||||
if (memOpAlign(Op.getSrcAlign(), Op.getDstAlign(), AlignCheck))
|
||||
auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
|
||||
if (Op.isAligned(AlignCheck))
|
||||
return true;
|
||||
bool Fast;
|
||||
return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone,
|
||||
|
@ -9439,13 +9434,13 @@ EVT AArch64TargetLowering::getOptimalMemOpType(
|
|||
};
|
||||
|
||||
if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
|
||||
AlignmentIsAcceptable(MVT::v2i64, 16))
|
||||
AlignmentIsAcceptable(MVT::v2i64, Align(16)))
|
||||
return MVT::v2i64;
|
||||
if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16))
|
||||
if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
|
||||
return MVT::f128;
|
||||
if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
|
||||
if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
|
||||
return MVT::i64;
|
||||
if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
|
||||
if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
|
||||
return MVT::i32;
|
||||
return MVT::Other;
|
||||
}
|
||||
|
@ -9460,8 +9455,8 @@ LLT AArch64TargetLowering::getOptimalMemOpLLT(
|
|||
// taken one instruction to materialize the v2i64 zero and one store (with
|
||||
// restrictive addressing mode). Just do i64 stores.
|
||||
bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
|
||||
auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) {
|
||||
if (memOpAlign(Op.getSrcAlign(), Op.getDstAlign(), AlignCheck))
|
||||
auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
|
||||
if (Op.isAligned(AlignCheck))
|
||||
return true;
|
||||
bool Fast;
|
||||
return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone,
|
||||
|
@ -9470,13 +9465,13 @@ LLT AArch64TargetLowering::getOptimalMemOpLLT(
|
|||
};
|
||||
|
||||
if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
|
||||
AlignmentIsAcceptable(MVT::v2i64, 16))
|
||||
AlignmentIsAcceptable(MVT::v2i64, Align(16)))
|
||||
return LLT::vector(2, 64);
|
||||
if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16))
|
||||
if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
|
||||
return LLT::scalar(128);
|
||||
if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
|
||||
if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
|
||||
return LLT::scalar(64);
|
||||
if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
|
||||
if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
|
||||
return LLT::scalar(32);
|
||||
return LLT();
|
||||
}
|
||||
|
|
|
@ -1299,10 +1299,10 @@ EVT SITargetLowering::getOptimalMemOpType(
|
|||
// use. Make sure we switch these to 64-bit accesses.
|
||||
|
||||
if (Op.size() >= 16 &&
|
||||
Op.getDstAlign() >= 4) // XXX: Should only do for global
|
||||
Op.isDstAligned(Align(4))) // XXX: Should only do for global
|
||||
return MVT::v4i32;
|
||||
|
||||
if (Op.size() >= 8 && Op.getDstAlign() >= 4)
|
||||
if (Op.size() >= 8 && Op.isDstAligned(Align(4)))
|
||||
return MVT::v2i32;
|
||||
|
||||
// Use the default.
|
||||
|
|
|
@ -15018,26 +15018,21 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
|
||||
unsigned AlignCheck) {
|
||||
return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
|
||||
(DstAlign == 0 || DstAlign % AlignCheck == 0));
|
||||
}
|
||||
|
||||
EVT ARMTargetLowering::getOptimalMemOpType(
|
||||
const MemOp &Op, const AttributeList &FuncAttributes) const {
|
||||
// See if we can use NEON instructions for this...
|
||||
if ((!Op.isMemset() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
|
||||
if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
|
||||
!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
|
||||
bool Fast;
|
||||
if (Op.size() >= 16 &&
|
||||
(memOpAlign(Op.getSrcAlign(), Op.getDstAlign(), 16) ||
|
||||
(Op.isAligned(Align(16)) ||
|
||||
(allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1,
|
||||
MachineMemOperand::MONone, &Fast) &&
|
||||
Fast))) {
|
||||
return MVT::v2f64;
|
||||
} else if (Op.size() >= 8 &&
|
||||
(memOpAlign(Op.getSrcAlign(), Op.getDstAlign(), 8) ||
|
||||
(Op.isAligned(Align(8)) ||
|
||||
(allowsMisalignedMemoryAccesses(
|
||||
MVT::f64, 0, 1, MachineMemOperand::MONone, &Fast) &&
|
||||
Fast))) {
|
||||
|
|
|
@ -3380,21 +3380,12 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
|
|||
/// determined using generic target-independent logic.
|
||||
EVT HexagonTargetLowering::getOptimalMemOpType(
|
||||
const MemOp &Op, const AttributeList &FuncAttributes) const {
|
||||
|
||||
auto Aligned = [](unsigned GivenA, unsigned MinA) -> bool {
|
||||
return (GivenA % MinA) == 0;
|
||||
};
|
||||
|
||||
if (Op.size() >= 8 && Aligned(Op.getDstAlign(), 8) &&
|
||||
(Op.isMemset() || Aligned(Op.getSrcAlign(), 8)))
|
||||
if (Op.size() >= 8 && Op.isAligned(Align(8)))
|
||||
return MVT::i64;
|
||||
if (Op.size() >= 4 && Aligned(Op.getDstAlign(), 4) &&
|
||||
(Op.isMemset() || Aligned(Op.getSrcAlign(), 4)))
|
||||
if (Op.size() >= 4 && Op.isAligned(Align(4)))
|
||||
return MVT::i32;
|
||||
if (Op.size() >= 2 && Aligned(Op.getDstAlign(), 2) &&
|
||||
(Op.isMemset() || Aligned(Op.getSrcAlign(), 2)))
|
||||
if (Op.size() >= 2 && Op.isAligned(Align(2)))
|
||||
return MVT::i16;
|
||||
|
||||
return MVT::Other;
|
||||
}
|
||||
|
||||
|
|
|
@ -15120,9 +15120,7 @@ EVT PPCTargetLowering::getOptimalMemOpType(
|
|||
// When expanding a memset, require at least two QPX instructions to cover
|
||||
// the cost of loading the value to be stored from the constant pool.
|
||||
if (Subtarget.hasQPX() && Op.size() >= 32 &&
|
||||
(!Op.isMemset() || Op.size() >= 64) &&
|
||||
(!Op.getSrcAlign() || Op.getSrcAlign() >= 32) &&
|
||||
(!Op.getDstAlign() || Op.getDstAlign() >= 32) &&
|
||||
(Op.isMemcpy() || Op.size() >= 64) && Op.isAligned(Align(32)) &&
|
||||
!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
|
||||
return MVT::v4f64;
|
||||
}
|
||||
|
@ -15130,8 +15128,7 @@ EVT PPCTargetLowering::getOptimalMemOpType(
|
|||
// We should use Altivec/VSX loads and stores when available. For unaligned
|
||||
// addresses, unaligned VSX loads are only fast starting with the P8.
|
||||
if (Subtarget.hasAltivec() && Op.size() >= 16 &&
|
||||
(((!Op.getSrcAlign() || Op.getSrcAlign() >= 16) &&
|
||||
(!Op.getDstAlign() || Op.getDstAlign() >= 16)) ||
|
||||
(Op.isAligned(Align(16)) ||
|
||||
((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
|
||||
return MVT::v4i32;
|
||||
}
|
||||
|
|
|
@ -2264,9 +2264,7 @@ EVT X86TargetLowering::getOptimalMemOpType(
|
|||
const MemOp &Op, const AttributeList &FuncAttributes) const {
|
||||
if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
|
||||
if (Op.size() >= 16 &&
|
||||
(!Subtarget.isUnalignedMem16Slow() ||
|
||||
((Op.getDstAlign() == 0 || Op.getDstAlign() >= 16) &&
|
||||
(Op.getSrcAlign() == 0 || Op.getSrcAlign() >= 16)))) {
|
||||
(!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
|
||||
// FIXME: Check if unaligned 64-byte accesses are slow.
|
||||
if (Op.size() >= 64 && Subtarget.hasAVX512() &&
|
||||
(Subtarget.getPreferVectorWidth() >= 512)) {
|
||||
|
@ -2289,7 +2287,7 @@ EVT X86TargetLowering::getOptimalMemOpType(
|
|||
if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
|
||||
(Subtarget.getPreferVectorWidth() >= 128))
|
||||
return MVT::v4f32;
|
||||
} else if ((!Op.isMemset() || Op.isZeroMemset()) && !Op.isMemcpyStrSrc() &&
|
||||
} else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
|
||||
Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
|
||||
// Do not use f64 to lower memcpy if source is string constant. It's
|
||||
// better to use i32 to avoid the loads.
|
||||
|
|
Loading…
Reference in New Issue