forked from OSchip/llvm-project
[NFC] Introduce a type to model memory operation
Summary: This is a first step before changing the types to llvm::Align and introduce functions to ease client code. Reviewers: courbet Subscribers: arsenm, sdardis, nemanjai, jvesely, nhaehnle, hiraditya, kbarton, jrtc27, atanasyan, jsji, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D73785
This commit is contained in:
parent
edc3f4f02e
commit
3c89b75f23
|
@ -106,6 +106,49 @@ namespace Sched {
|
||||||
|
|
||||||
} // end namespace Sched
|
} // end namespace Sched
|
||||||
|
|
||||||
|
// MemOp models a memory operation, either memset or memcpy/memmove.
|
||||||
|
struct MemOp {
|
||||||
|
// Shared
|
||||||
|
uint64_t Size;
|
||||||
|
unsigned DstAlign; // Specified alignment of the memory operation or zero if
|
||||||
|
// destination alignment can satisfy any constraint.
|
||||||
|
bool AllowOverlap;
|
||||||
|
// memset only
|
||||||
|
bool IsMemset; // If setthis memory operation is a memset.
|
||||||
|
bool ZeroMemset; // If set clears out memory with zeros.
|
||||||
|
// memcpy only
|
||||||
|
bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register
|
||||||
|
// constant so it does not need to be loaded.
|
||||||
|
unsigned SrcAlign; // Inferred alignment of the source or zero if the memory
|
||||||
|
// operation does not need to load the value.
|
||||||
|
|
||||||
|
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, unsigned DstAlign,
|
||||||
|
unsigned SrcAlign, bool IsVolatile,
|
||||||
|
bool MemcpyStrSrc = false) {
|
||||||
|
return {
|
||||||
|
/*.Size =*/Size,
|
||||||
|
/*.DstAlign =*/DstAlignCanChange ? 0 : DstAlign,
|
||||||
|
/*.AllowOverlap =*/!IsVolatile,
|
||||||
|
/*.IsMemset =*/false,
|
||||||
|
/*.ZeroMemset =*/false,
|
||||||
|
/*.MemcpyStrSrc =*/MemcpyStrSrc,
|
||||||
|
/*.SrcAlign =*/SrcAlign,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
static MemOp Set(uint64_t Size, bool DstAlignCanChange, unsigned DstAlign,
|
||||||
|
bool IsZeroMemset, bool IsVolatile) {
|
||||||
|
return {
|
||||||
|
/*.Size =*/Size,
|
||||||
|
/*.DstAlign =*/DstAlignCanChange ? 0 : DstAlign,
|
||||||
|
/*.AllowOverlap =*/!IsVolatile,
|
||||||
|
/*.IsMemset =*/true,
|
||||||
|
/*.ZeroMemset =*/IsZeroMemset,
|
||||||
|
/*.MemcpyStrSrc =*/false,
|
||||||
|
/*.SrcAlign =*/0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/// This base class for TargetLowering contains the SelectionDAG-independent
|
/// This base class for TargetLowering contains the SelectionDAG-independent
|
||||||
/// parts that can be used from the rest of CodeGen.
|
/// parts that can be used from the rest of CodeGen.
|
||||||
class TargetLoweringBase {
|
class TargetLoweringBase {
|
||||||
|
@ -1518,29 +1561,17 @@ public:
|
||||||
|
|
||||||
/// Returns the target specific optimal type for load and store operations as
|
/// Returns the target specific optimal type for load and store operations as
|
||||||
/// a result of memset, memcpy, and memmove lowering.
|
/// a result of memset, memcpy, and memmove lowering.
|
||||||
///
|
/// It returns EVT::Other if the type should be determined using generic
|
||||||
/// If DstAlign is zero that means it's safe to destination alignment can
|
/// target-independent logic.
|
||||||
/// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
|
|
||||||
/// a need to check it against alignment requirement, probably because the
|
|
||||||
/// source does not need to be loaded. If 'IsMemset' is true, that means it's
|
|
||||||
/// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
|
|
||||||
/// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
|
|
||||||
/// does not need to be loaded. It returns EVT::Other if the type should be
|
|
||||||
/// determined using generic target-independent logic.
|
|
||||||
virtual EVT
|
virtual EVT
|
||||||
getOptimalMemOpType(uint64_t /*Size*/, unsigned /*DstAlign*/,
|
getOptimalMemOpType(const MemOp &Op,
|
||||||
unsigned /*SrcAlign*/, bool /*IsMemset*/,
|
|
||||||
bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/,
|
|
||||||
const AttributeList & /*FuncAttributes*/) const {
|
const AttributeList & /*FuncAttributes*/) const {
|
||||||
return MVT::Other;
|
return MVT::Other;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// LLT returning variant.
|
/// LLT returning variant.
|
||||||
virtual LLT
|
virtual LLT
|
||||||
getOptimalMemOpLLT(uint64_t /*Size*/, unsigned /*DstAlign*/,
|
getOptimalMemOpLLT(const MemOp &Op,
|
||||||
unsigned /*SrcAlign*/, bool /*IsMemset*/,
|
|
||||||
bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/,
|
|
||||||
const AttributeList & /*FuncAttributes*/) const {
|
const AttributeList & /*FuncAttributes*/) const {
|
||||||
return LLT();
|
return LLT();
|
||||||
}
|
}
|
||||||
|
@ -3102,14 +3133,8 @@ public:
|
||||||
/// Return true if the number of memory ops is below the threshold (Limit).
|
/// Return true if the number of memory ops is below the threshold (Limit).
|
||||||
/// It returns the types of the sequence of memory ops to perform
|
/// It returns the types of the sequence of memory ops to perform
|
||||||
/// memset / memcpy by reference.
|
/// memset / memcpy by reference.
|
||||||
bool findOptimalMemOpLowering(std::vector<EVT> &MemOps,
|
bool findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
|
||||||
unsigned Limit, uint64_t Size,
|
const MemOp &Op, unsigned DstAS, unsigned SrcAS,
|
||||||
unsigned DstAlign, unsigned SrcAlign,
|
|
||||||
bool IsMemset,
|
|
||||||
bool ZeroMemset,
|
|
||||||
bool MemcpyStrSrc,
|
|
||||||
bool AllowOverlap,
|
|
||||||
unsigned DstAS, unsigned SrcAS,
|
|
||||||
const AttributeList &FuncAttributes) const;
|
const AttributeList &FuncAttributes) const;
|
||||||
|
|
||||||
/// Check to see if the specified operand of the specified instruction is a
|
/// Check to see if the specified operand of the specified instruction is a
|
||||||
|
|
|
@ -855,37 +855,30 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
|
||||||
|
|
||||||
// Returns a list of types to use for memory op lowering in MemOps. A partial
|
// Returns a list of types to use for memory op lowering in MemOps. A partial
|
||||||
// port of findOptimalMemOpLowering in TargetLowering.
|
// port of findOptimalMemOpLowering in TargetLowering.
|
||||||
static bool findGISelOptimalMemOpLowering(
|
static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
|
||||||
std::vector<LLT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign,
|
unsigned Limit, const MemOp &Op,
|
||||||
unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
|
unsigned DstAS, unsigned SrcAS,
|
||||||
bool AllowOverlap, unsigned DstAS, unsigned SrcAS,
|
const AttributeList &FuncAttributes,
|
||||||
const AttributeList &FuncAttributes, const TargetLowering &TLI) {
|
const TargetLowering &TLI) {
|
||||||
// If 'SrcAlign' is zero, that means the memory operation does not need to
|
if (Op.SrcAlign != 0 && Op.SrcAlign < Op.DstAlign)
|
||||||
// load the value, i.e. memset or memcpy from constant string. Otherwise,
|
|
||||||
// it's the inferred alignment of the source. 'DstAlign', on the other hand,
|
|
||||||
// is the specified alignment of the memory operation. If it is zero, that
|
|
||||||
// means it's possible to change the alignment of the destination.
|
|
||||||
// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
|
|
||||||
// not need to be loaded.
|
|
||||||
if (SrcAlign != 0 && SrcAlign < DstAlign)
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
LLT Ty = TLI.getOptimalMemOpLLT(Size, DstAlign, SrcAlign, IsMemset,
|
LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
|
||||||
ZeroMemset, MemcpyStrSrc, FuncAttributes);
|
|
||||||
|
|
||||||
if (Ty == LLT()) {
|
if (Ty == LLT()) {
|
||||||
// Use the largest scalar type whose alignment constraints are satisfied.
|
// Use the largest scalar type whose alignment constraints are satisfied.
|
||||||
// We only need to check DstAlign here as SrcAlign is always greater or
|
// We only need to check DstAlign here as SrcAlign is always greater or
|
||||||
// equal to DstAlign (or zero).
|
// equal to DstAlign (or zero).
|
||||||
Ty = LLT::scalar(64);
|
Ty = LLT::scalar(64);
|
||||||
while (DstAlign && DstAlign < Ty.getSizeInBytes() &&
|
while (Op.DstAlign && Op.DstAlign < Ty.getSizeInBytes() &&
|
||||||
!TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, DstAlign))
|
!TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.DstAlign))
|
||||||
Ty = LLT::scalar(Ty.getSizeInBytes());
|
Ty = LLT::scalar(Ty.getSizeInBytes());
|
||||||
assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
|
assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
|
||||||
// FIXME: check for the largest legal type we can load/store to.
|
// FIXME: check for the largest legal type we can load/store to.
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned NumMemOps = 0;
|
unsigned NumMemOps = 0;
|
||||||
|
auto Size = Op.Size;
|
||||||
while (Size != 0) {
|
while (Size != 0) {
|
||||||
unsigned TySize = Ty.getSizeInBytes();
|
unsigned TySize = Ty.getSizeInBytes();
|
||||||
while (TySize > Size) {
|
while (TySize > Size) {
|
||||||
|
@ -904,9 +897,9 @@ static bool findGISelOptimalMemOpLowering(
|
||||||
bool Fast;
|
bool Fast;
|
||||||
// Need to get a VT equivalent for allowMisalignedMemoryAccesses().
|
// Need to get a VT equivalent for allowMisalignedMemoryAccesses().
|
||||||
MVT VT = getMVTForLLT(Ty);
|
MVT VT = getMVTForLLT(Ty);
|
||||||
if (NumMemOps && AllowOverlap && NewTySize < Size &&
|
if (NumMemOps && Op.AllowOverlap && NewTySize < Size &&
|
||||||
TLI.allowsMisalignedMemoryAccesses(
|
TLI.allowsMisalignedMemoryAccesses(
|
||||||
VT, DstAS, DstAlign, MachineMemOperand::MONone, &Fast) &&
|
VT, DstAS, Op.DstAlign, MachineMemOperand::MONone, &Fast) &&
|
||||||
Fast)
|
Fast)
|
||||||
TySize = Size;
|
TySize = Size;
|
||||||
else {
|
else {
|
||||||
|
@ -988,11 +981,12 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val
|
||||||
auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
|
auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
|
||||||
bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
|
bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
|
||||||
|
|
||||||
if (!findGISelOptimalMemOpLowering(
|
if (!findGISelOptimalMemOpLowering(MemOps, Limit,
|
||||||
MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), 0,
|
MemOp::Set(KnownLen, DstAlignCanChange,
|
||||||
/*IsMemset=*/true,
|
Align,
|
||||||
/*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
|
/*IsZeroMemset=*/IsZeroVal,
|
||||||
/*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), ~0u,
|
/*IsVolatile=*/IsVolatile),
|
||||||
|
DstPtrInfo.getAddrSpace(), ~0u,
|
||||||
MF.getFunction().getAttributes(), TLI))
|
MF.getFunction().getAttributes(), TLI))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -1107,12 +1101,11 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
|
||||||
MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
|
MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
|
||||||
|
|
||||||
if (!findGISelOptimalMemOpLowering(
|
if (!findGISelOptimalMemOpLowering(
|
||||||
MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
|
MemOps, Limit,
|
||||||
SrcAlign,
|
MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
|
||||||
/*IsMemset=*/false,
|
IsVolatile),
|
||||||
/*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
|
DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
|
||||||
/*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(),
|
MF.getFunction().getAttributes(), TLI))
|
||||||
SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (DstAlignCanChange) {
|
if (DstAlignCanChange) {
|
||||||
|
@ -1214,12 +1207,11 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
|
||||||
// to a bug in it's findOptimalMemOpLowering implementation. For now do the
|
// to a bug in it's findOptimalMemOpLowering implementation. For now do the
|
||||||
// same thing here.
|
// same thing here.
|
||||||
if (!findGISelOptimalMemOpLowering(
|
if (!findGISelOptimalMemOpLowering(
|
||||||
MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
|
MemOps, Limit,
|
||||||
SrcAlign,
|
MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
|
||||||
/*IsMemset=*/false,
|
/*IsVolatile*/ true),
|
||||||
/*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
|
DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
|
||||||
/*AllowOverlap=*/false, DstPtrInfo.getAddrSpace(),
|
MF.getFunction().getAttributes(), TLI))
|
||||||
SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (DstAlignCanChange) {
|
if (DstAlignCanChange) {
|
||||||
|
|
|
@ -5908,13 +5908,12 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
|
||||||
bool CopyFromConstant = isMemSrcFromConstant(Src, Slice);
|
bool CopyFromConstant = isMemSrcFromConstant(Src, Slice);
|
||||||
bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
|
bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
|
||||||
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
|
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
|
||||||
|
|
||||||
if (!TLI.findOptimalMemOpLowering(
|
if (!TLI.findOptimalMemOpLowering(
|
||||||
MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment),
|
MemOps, Limit,
|
||||||
(isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false,
|
MemOp::Copy(Size, DstAlignCanChange, Alignment,
|
||||||
/*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant,
|
isZeroConstant ? 0 : SrcAlign, isVol, CopyFromConstant),
|
||||||
/*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(),
|
DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
|
||||||
SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes()))
|
MF.getFunction().getAttributes()))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
if (DstAlignCanChange) {
|
if (DstAlignCanChange) {
|
||||||
|
@ -6088,14 +6087,11 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
|
||||||
if (Align > SrcAlign)
|
if (Align > SrcAlign)
|
||||||
SrcAlign = Align;
|
SrcAlign = Align;
|
||||||
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
|
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
|
||||||
// FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in
|
|
||||||
// findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the
|
|
||||||
// correct code.
|
|
||||||
bool AllowOverlap = false;
|
|
||||||
if (!TLI.findOptimalMemOpLowering(
|
if (!TLI.findOptimalMemOpLowering(
|
||||||
MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign,
|
MemOps, Limit,
|
||||||
/*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
|
MemOp::Copy(Size, DstAlignCanChange, Align, SrcAlign,
|
||||||
AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
|
/*IsVolatile*/ true),
|
||||||
|
DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
|
||||||
MF.getFunction().getAttributes()))
|
MF.getFunction().getAttributes()))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
|
@ -6193,11 +6189,9 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
|
||||||
bool IsZeroVal =
|
bool IsZeroVal =
|
||||||
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
|
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
|
||||||
if (!TLI.findOptimalMemOpLowering(
|
if (!TLI.findOptimalMemOpLowering(
|
||||||
MemOps, TLI.getMaxStoresPerMemset(OptSize), Size,
|
MemOps, TLI.getMaxStoresPerMemset(OptSize),
|
||||||
(DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true,
|
MemOp::Set(Size, DstAlignCanChange, Align, IsZeroVal, isVol),
|
||||||
/*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
|
DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes()))
|
||||||
/*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u,
|
|
||||||
MF.getFunction().getAttributes()))
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
if (DstAlignCanChange) {
|
if (DstAlignCanChange) {
|
||||||
|
|
|
@ -176,16 +176,9 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
|
||||||
return LowerCallTo(CLI);
|
return LowerCallTo(CLI);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool TargetLowering::findOptimalMemOpLowering(
|
||||||
TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
|
std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
|
||||||
unsigned Limit, uint64_t Size,
|
unsigned SrcAS, const AttributeList &FuncAttributes) const {
|
||||||
unsigned DstAlign, unsigned SrcAlign,
|
|
||||||
bool IsMemset,
|
|
||||||
bool ZeroMemset,
|
|
||||||
bool MemcpyStrSrc,
|
|
||||||
bool AllowOverlap,
|
|
||||||
unsigned DstAS, unsigned SrcAS,
|
|
||||||
const AttributeList &FuncAttributes) const {
|
|
||||||
// If 'SrcAlign' is zero, that means the memory operation does not need to
|
// If 'SrcAlign' is zero, that means the memory operation does not need to
|
||||||
// load the value, i.e. memset or memcpy from constant string. Otherwise,
|
// load the value, i.e. memset or memcpy from constant string. Otherwise,
|
||||||
// it's the inferred alignment of the source. 'DstAlign', on the other hand,
|
// it's the inferred alignment of the source. 'DstAlign', on the other hand,
|
||||||
|
@ -193,20 +186,18 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
|
||||||
// means it's possible to change the alignment of the destination.
|
// means it's possible to change the alignment of the destination.
|
||||||
// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
|
// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
|
||||||
// not need to be loaded.
|
// not need to be loaded.
|
||||||
if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
|
if (!(Op.SrcAlign == 0 || Op.SrcAlign >= Op.DstAlign))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
|
EVT VT = getOptimalMemOpType(Op, FuncAttributes);
|
||||||
IsMemset, ZeroMemset, MemcpyStrSrc,
|
|
||||||
FuncAttributes);
|
|
||||||
|
|
||||||
if (VT == MVT::Other) {
|
if (VT == MVT::Other) {
|
||||||
// Use the largest integer type whose alignment constraints are satisfied.
|
// Use the largest integer type whose alignment constraints are satisfied.
|
||||||
// We only need to check DstAlign here as SrcAlign is always greater or
|
// We only need to check DstAlign here as SrcAlign is always greater or
|
||||||
// equal to DstAlign (or zero).
|
// equal to DstAlign (or zero).
|
||||||
VT = MVT::i64;
|
VT = MVT::i64;
|
||||||
while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
|
while (Op.DstAlign && Op.DstAlign < VT.getSizeInBits() / 8 &&
|
||||||
!allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
|
!allowsMisalignedMemoryAccesses(VT, DstAS, Op.DstAlign))
|
||||||
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
|
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
|
||||||
assert(VT.isInteger());
|
assert(VT.isInteger());
|
||||||
|
|
||||||
|
@ -223,6 +214,7 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned NumMemOps = 0;
|
unsigned NumMemOps = 0;
|
||||||
|
auto Size = Op.Size;
|
||||||
while (Size != 0) {
|
while (Size != 0) {
|
||||||
unsigned VTSize = VT.getSizeInBits() / 8;
|
unsigned VTSize = VT.getSizeInBits() / 8;
|
||||||
while (VTSize > Size) {
|
while (VTSize > Size) {
|
||||||
|
@ -257,8 +249,8 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
|
||||||
// If the new VT cannot cover all of the remaining bits, then consider
|
// If the new VT cannot cover all of the remaining bits, then consider
|
||||||
// issuing a (or a pair of) unaligned and overlapping load / store.
|
// issuing a (or a pair of) unaligned and overlapping load / store.
|
||||||
bool Fast;
|
bool Fast;
|
||||||
if (NumMemOps && AllowOverlap && NewVTSize < Size &&
|
if (NumMemOps && Op.AllowOverlap && NewVTSize < Size &&
|
||||||
allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign,
|
allowsMisalignedMemoryAccesses(VT, DstAS, Op.DstAlign,
|
||||||
MachineMemOperand::MONone, &Fast) &&
|
MachineMemOperand::MONone, &Fast) &&
|
||||||
Fast)
|
Fast)
|
||||||
VTSize = Size;
|
VTSize = Size;
|
||||||
|
|
|
@ -9426,9 +9426,7 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
|
||||||
}
|
}
|
||||||
|
|
||||||
EVT AArch64TargetLowering::getOptimalMemOpType(
|
EVT AArch64TargetLowering::getOptimalMemOpType(
|
||||||
uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
|
const MemOp &Op, const AttributeList &FuncAttributes) const {
|
||||||
bool ZeroMemset, bool MemcpyStrSrc,
|
|
||||||
const AttributeList &FuncAttributes) const {
|
|
||||||
bool CanImplicitFloat =
|
bool CanImplicitFloat =
|
||||||
!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
|
!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
|
||||||
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
|
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
|
||||||
|
@ -9436,9 +9434,9 @@ EVT AArch64TargetLowering::getOptimalMemOpType(
|
||||||
// Only use AdvSIMD to implement memset of 32-byte and above. It would have
|
// Only use AdvSIMD to implement memset of 32-byte and above. It would have
|
||||||
// taken one instruction to materialize the v2i64 zero and one store (with
|
// taken one instruction to materialize the v2i64 zero and one store (with
|
||||||
// restrictive addressing mode). Just do i64 stores.
|
// restrictive addressing mode). Just do i64 stores.
|
||||||
bool IsSmallMemset = IsMemset && Size < 32;
|
bool IsSmallMemset = Op.IsMemset && Op.Size < 32;
|
||||||
auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) {
|
auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) {
|
||||||
if (memOpAlign(SrcAlign, DstAlign, AlignCheck))
|
if (memOpAlign(Op.SrcAlign, Op.DstAlign, AlignCheck))
|
||||||
return true;
|
return true;
|
||||||
bool Fast;
|
bool Fast;
|
||||||
return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone,
|
return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone,
|
||||||
|
@ -9446,22 +9444,20 @@ EVT AArch64TargetLowering::getOptimalMemOpType(
|
||||||
Fast;
|
Fast;
|
||||||
};
|
};
|
||||||
|
|
||||||
if (CanUseNEON && IsMemset && !IsSmallMemset &&
|
if (CanUseNEON && Op.IsMemset && !IsSmallMemset &&
|
||||||
AlignmentIsAcceptable(MVT::v2i64, 16))
|
AlignmentIsAcceptable(MVT::v2i64, 16))
|
||||||
return MVT::v2i64;
|
return MVT::v2i64;
|
||||||
if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16))
|
if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16))
|
||||||
return MVT::f128;
|
return MVT::f128;
|
||||||
if (Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
|
if (Op.Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
|
||||||
return MVT::i64;
|
return MVT::i64;
|
||||||
if (Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
|
if (Op.Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
|
||||||
return MVT::i32;
|
return MVT::i32;
|
||||||
return MVT::Other;
|
return MVT::Other;
|
||||||
}
|
}
|
||||||
|
|
||||||
LLT AArch64TargetLowering::getOptimalMemOpLLT(
|
LLT AArch64TargetLowering::getOptimalMemOpLLT(
|
||||||
uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
|
const MemOp &Op, const AttributeList &FuncAttributes) const {
|
||||||
bool ZeroMemset, bool MemcpyStrSrc,
|
|
||||||
const AttributeList &FuncAttributes) const {
|
|
||||||
bool CanImplicitFloat =
|
bool CanImplicitFloat =
|
||||||
!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
|
!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
|
||||||
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
|
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
|
||||||
|
@ -9469,9 +9465,9 @@ LLT AArch64TargetLowering::getOptimalMemOpLLT(
|
||||||
// Only use AdvSIMD to implement memset of 32-byte and above. It would have
|
// Only use AdvSIMD to implement memset of 32-byte and above. It would have
|
||||||
// taken one instruction to materialize the v2i64 zero and one store (with
|
// taken one instruction to materialize the v2i64 zero and one store (with
|
||||||
// restrictive addressing mode). Just do i64 stores.
|
// restrictive addressing mode). Just do i64 stores.
|
||||||
bool IsSmallMemset = IsMemset && Size < 32;
|
bool IsSmallMemset = Op.IsMemset && Op.Size < 32;
|
||||||
auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) {
|
auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) {
|
||||||
if (memOpAlign(SrcAlign, DstAlign, AlignCheck))
|
if (memOpAlign(Op.SrcAlign, Op.DstAlign, AlignCheck))
|
||||||
return true;
|
return true;
|
||||||
bool Fast;
|
bool Fast;
|
||||||
return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone,
|
return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone,
|
||||||
|
@ -9479,14 +9475,14 @@ LLT AArch64TargetLowering::getOptimalMemOpLLT(
|
||||||
Fast;
|
Fast;
|
||||||
};
|
};
|
||||||
|
|
||||||
if (CanUseNEON && IsMemset && !IsSmallMemset &&
|
if (CanUseNEON && Op.IsMemset && !IsSmallMemset &&
|
||||||
AlignmentIsAcceptable(MVT::v2i64, 16))
|
AlignmentIsAcceptable(MVT::v2i64, 16))
|
||||||
return LLT::vector(2, 64);
|
return LLT::vector(2, 64);
|
||||||
if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16))
|
if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16))
|
||||||
return LLT::scalar(128);
|
return LLT::scalar(128);
|
||||||
if (Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
|
if (Op.Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
|
||||||
return LLT::scalar(64);
|
return LLT::scalar(64);
|
||||||
if (Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
|
if (Op.Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
|
||||||
return LLT::scalar(32);
|
return LLT::scalar(32);
|
||||||
return LLT();
|
return LLT();
|
||||||
}
|
}
|
||||||
|
|
|
@ -428,12 +428,10 @@ public:
|
||||||
|
|
||||||
bool shouldConsiderGEPOffsetSplit() const override;
|
bool shouldConsiderGEPOffsetSplit() const override;
|
||||||
|
|
||||||
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
|
EVT getOptimalMemOpType(const MemOp &Op,
|
||||||
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
|
|
||||||
const AttributeList &FuncAttributes) const override;
|
const AttributeList &FuncAttributes) const override;
|
||||||
|
|
||||||
LLT getOptimalMemOpLLT(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
|
LLT getOptimalMemOpLLT(const MemOp &Op,
|
||||||
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
|
|
||||||
const AttributeList &FuncAttributes) const override;
|
const AttributeList &FuncAttributes) const override;
|
||||||
|
|
||||||
/// Return true if the addressing mode represented by AM is legal for this
|
/// Return true if the addressing mode represented by AM is legal for this
|
||||||
|
|
|
@ -1320,18 +1320,16 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
|
||||||
}
|
}
|
||||||
|
|
||||||
EVT SITargetLowering::getOptimalMemOpType(
|
EVT SITargetLowering::getOptimalMemOpType(
|
||||||
uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
|
const MemOp &Op, const AttributeList &FuncAttributes) const {
|
||||||
bool ZeroMemset, bool MemcpyStrSrc,
|
|
||||||
const AttributeList &FuncAttributes) const {
|
|
||||||
// FIXME: Should account for address space here.
|
// FIXME: Should account for address space here.
|
||||||
|
|
||||||
// The default fallback uses the private pointer size as a guess for a type to
|
// The default fallback uses the private pointer size as a guess for a type to
|
||||||
// use. Make sure we switch these to 64-bit accesses.
|
// use. Make sure we switch these to 64-bit accesses.
|
||||||
|
|
||||||
if (Size >= 16 && DstAlign >= 4) // XXX: Should only do for global
|
if (Op.Size >= 16 && Op.DstAlign >= 4) // XXX: Should only do for global
|
||||||
return MVT::v4i32;
|
return MVT::v4i32;
|
||||||
|
|
||||||
if (Size >= 8 && DstAlign >= 4)
|
if (Op.Size >= 8 && Op.DstAlign >= 4)
|
||||||
return MVT::v2i32;
|
return MVT::v2i32;
|
||||||
|
|
||||||
// Use the default.
|
// Use the default.
|
||||||
|
|
|
@ -257,10 +257,7 @@ public:
|
||||||
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||||
bool *IsFast = nullptr) const override;
|
bool *IsFast = nullptr) const override;
|
||||||
|
|
||||||
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
|
EVT getOptimalMemOpType(const MemOp &Op,
|
||||||
unsigned SrcAlign, bool IsMemset,
|
|
||||||
bool ZeroMemset,
|
|
||||||
bool MemcpyStrSrc,
|
|
||||||
const AttributeList &FuncAttributes) const override;
|
const AttributeList &FuncAttributes) const override;
|
||||||
|
|
||||||
bool isMemOpUniform(const SDNode *N) const;
|
bool isMemOpUniform(const SDNode *N) const;
|
||||||
|
|
|
@ -14961,21 +14961,19 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
|
||||||
}
|
}
|
||||||
|
|
||||||
EVT ARMTargetLowering::getOptimalMemOpType(
|
EVT ARMTargetLowering::getOptimalMemOpType(
|
||||||
uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
|
const MemOp &Op, const AttributeList &FuncAttributes) const {
|
||||||
bool ZeroMemset, bool MemcpyStrSrc,
|
|
||||||
const AttributeList &FuncAttributes) const {
|
|
||||||
// See if we can use NEON instructions for this...
|
// See if we can use NEON instructions for this...
|
||||||
if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() &&
|
if ((!Op.IsMemset || Op.ZeroMemset) && Subtarget->hasNEON() &&
|
||||||
!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
|
!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
|
||||||
bool Fast;
|
bool Fast;
|
||||||
if (Size >= 16 &&
|
if (Op.Size >= 16 &&
|
||||||
(memOpAlign(SrcAlign, DstAlign, 16) ||
|
(memOpAlign(Op.SrcAlign, Op.DstAlign, 16) ||
|
||||||
(allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1,
|
(allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1,
|
||||||
MachineMemOperand::MONone, &Fast) &&
|
MachineMemOperand::MONone, &Fast) &&
|
||||||
Fast))) {
|
Fast))) {
|
||||||
return MVT::v2f64;
|
return MVT::v2f64;
|
||||||
} else if (Size >= 8 &&
|
} else if (Op.Size >= 8 &&
|
||||||
(memOpAlign(SrcAlign, DstAlign, 8) ||
|
(memOpAlign(Op.SrcAlign, Op.DstAlign, 8) ||
|
||||||
(allowsMisalignedMemoryAccesses(
|
(allowsMisalignedMemoryAccesses(
|
||||||
MVT::f64, 0, 1, MachineMemOperand::MONone, &Fast) &&
|
MVT::f64, 0, 1, MachineMemOperand::MONone, &Fast) &&
|
||||||
Fast))) {
|
Fast))) {
|
||||||
|
|
|
@ -347,10 +347,7 @@ class VectorType;
|
||||||
MachineMemOperand::Flags Flags,
|
MachineMemOperand::Flags Flags,
|
||||||
bool *Fast) const override;
|
bool *Fast) const override;
|
||||||
|
|
||||||
EVT getOptimalMemOpType(uint64_t Size,
|
EVT getOptimalMemOpType(const MemOp &Op,
|
||||||
unsigned DstAlign, unsigned SrcAlign,
|
|
||||||
bool IsMemset, bool ZeroMemset,
|
|
||||||
bool MemcpyStrSrc,
|
|
||||||
const AttributeList &FuncAttributes) const override;
|
const AttributeList &FuncAttributes) const override;
|
||||||
|
|
||||||
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
|
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
|
||||||
|
|
|
@ -576,8 +576,9 @@ int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
|
||||||
// loaded and stored. That's why we multiply the number of elements by 2 to
|
// loaded and stored. That's why we multiply the number of elements by 2 to
|
||||||
// get the cost for this memcpy.
|
// get the cost for this memcpy.
|
||||||
if (getTLI()->findOptimalMemOpLowering(
|
if (getTLI()->findOptimalMemOpLowering(
|
||||||
MemOps, Limit, Size, DstAlign, SrcAlign, false /*IsMemset*/,
|
MemOps, Limit,
|
||||||
false /*ZeroMemset*/, false /*MemcpyStrSrc*/, false /*AllowOverlap*/,
|
MemOp::Copy(Size, /*DstAlignCanChange*/ false, DstAlign, SrcAlign,
|
||||||
|
/*IsVolatile*/ true),
|
||||||
MI->getDestAddressSpace(), MI->getSourceAddressSpace(),
|
MI->getDestAddressSpace(), MI->getSourceAddressSpace(),
|
||||||
F->getAttributes()))
|
F->getAttributes()))
|
||||||
return MemOps.size() * 2;
|
return MemOps.size() * 2;
|
||||||
|
|
|
@ -99,10 +99,9 @@ private:
|
||||||
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
|
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
|
||||||
SelectionDAG &DAG) const override;
|
SelectionDAG &DAG) const override;
|
||||||
|
|
||||||
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
|
EVT getOptimalMemOpType(const MemOp &Op,
|
||||||
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
|
|
||||||
const AttributeList &FuncAttributes) const override {
|
const AttributeList &FuncAttributes) const override {
|
||||||
return Size >= 8 ? MVT::i64 : MVT::i32;
|
return Op.Size >= 8 ? MVT::i64 : MVT::i32;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
|
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
|
||||||
|
|
|
@ -3379,19 +3379,21 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
|
||||||
/// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
|
/// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
|
||||||
/// does not need to be loaded. It returns EVT::Other if the type should be
|
/// does not need to be loaded. It returns EVT::Other if the type should be
|
||||||
/// determined using generic target-independent logic.
|
/// determined using generic target-independent logic.
|
||||||
EVT HexagonTargetLowering::getOptimalMemOpType(uint64_t Size,
|
EVT HexagonTargetLowering::getOptimalMemOpType(
|
||||||
unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset,
|
const MemOp &Op, const AttributeList &FuncAttributes) const {
|
||||||
bool MemcpyStrSrc, const AttributeList &FuncAttributes) const {
|
|
||||||
|
|
||||||
auto Aligned = [](unsigned GivenA, unsigned MinA) -> bool {
|
auto Aligned = [](unsigned GivenA, unsigned MinA) -> bool {
|
||||||
return (GivenA % MinA) == 0;
|
return (GivenA % MinA) == 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
if (Size >= 8 && Aligned(DstAlign, 8) && (IsMemset || Aligned(SrcAlign, 8)))
|
if (Op.Size >= 8 && Aligned(Op.DstAlign, 8) &&
|
||||||
|
(Op.IsMemset || Aligned(Op.SrcAlign, 8)))
|
||||||
return MVT::i64;
|
return MVT::i64;
|
||||||
if (Size >= 4 && Aligned(DstAlign, 4) && (IsMemset || Aligned(SrcAlign, 4)))
|
if (Op.Size >= 4 && Aligned(Op.DstAlign, 4) &&
|
||||||
|
(Op.IsMemset || Aligned(Op.SrcAlign, 4)))
|
||||||
return MVT::i32;
|
return MVT::i32;
|
||||||
if (Size >= 2 && Aligned(DstAlign, 2) && (IsMemset || Aligned(SrcAlign, 2)))
|
if (Op.Size >= 2 && Aligned(Op.DstAlign, 2) &&
|
||||||
|
(Op.IsMemset || Aligned(Op.SrcAlign, 2)))
|
||||||
return MVT::i16;
|
return MVT::i16;
|
||||||
|
|
||||||
return MVT::Other;
|
return MVT::Other;
|
||||||
|
|
|
@ -302,8 +302,7 @@ namespace HexagonISD {
|
||||||
/// the immediate into a register.
|
/// the immediate into a register.
|
||||||
bool isLegalICmpImmediate(int64_t Imm) const override;
|
bool isLegalICmpImmediate(int64_t Imm) const override;
|
||||||
|
|
||||||
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
|
EVT getOptimalMemOpType(const MemOp &Op,
|
||||||
unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
|
|
||||||
const AttributeList &FuncAttributes) const override;
|
const AttributeList &FuncAttributes) const override;
|
||||||
|
|
||||||
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
|
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
|
||||||
|
|
|
@ -4269,9 +4269,7 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
EVT MipsTargetLowering::getOptimalMemOpType(
|
EVT MipsTargetLowering::getOptimalMemOpType(
|
||||||
uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
|
const MemOp &Op, const AttributeList &FuncAttributes) const {
|
||||||
bool ZeroMemset, bool MemcpyStrSrc,
|
|
||||||
const AttributeList &FuncAttributes) const {
|
|
||||||
if (Subtarget.hasMips64())
|
if (Subtarget.hasMips64())
|
||||||
return MVT::i64;
|
return MVT::i64;
|
||||||
|
|
||||||
|
|
|
@ -669,10 +669,7 @@ class TargetRegisterClass;
|
||||||
|
|
||||||
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
|
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
|
||||||
|
|
||||||
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
|
EVT getOptimalMemOpType(const MemOp &Op,
|
||||||
unsigned SrcAlign,
|
|
||||||
bool IsMemset, bool ZeroMemset,
|
|
||||||
bool MemcpyStrSrc,
|
|
||||||
const AttributeList &FuncAttributes) const override;
|
const AttributeList &FuncAttributes) const override;
|
||||||
|
|
||||||
/// isFPImmLegal - Returns true if the target can instruction select the
|
/// isFPImmLegal - Returns true if the target can instruction select the
|
||||||
|
|
|
@ -15069,35 +15069,27 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// getOptimalMemOpType - Returns the target specific optimal type for load
|
|
||||||
/// and store operations as a result of memset, memcpy, and memmove
|
|
||||||
/// lowering. If DstAlign is zero that means it's safe to destination
|
|
||||||
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
|
|
||||||
/// means there isn't a need to check it against alignment requirement,
|
|
||||||
/// probably because the source does not need to be loaded. If 'IsMemset' is
|
|
||||||
/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
|
|
||||||
/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
|
|
||||||
/// source is constant so it does not need to be loaded.
|
|
||||||
/// It returns EVT::Other if the type should be determined using generic
|
/// It returns EVT::Other if the type should be determined using generic
|
||||||
/// target-independent logic.
|
/// target-independent logic.
|
||||||
EVT PPCTargetLowering::getOptimalMemOpType(
|
EVT PPCTargetLowering::getOptimalMemOpType(
|
||||||
uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
|
const MemOp &Op, const AttributeList &FuncAttributes) const {
|
||||||
bool ZeroMemset, bool MemcpyStrSrc,
|
|
||||||
const AttributeList &FuncAttributes) const {
|
|
||||||
if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
|
if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
|
||||||
// When expanding a memset, require at least two QPX instructions to cover
|
// When expanding a memset, require at least two QPX instructions to cover
|
||||||
// the cost of loading the value to be stored from the constant pool.
|
// the cost of loading the value to be stored from the constant pool.
|
||||||
if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
|
if (Subtarget.hasQPX() && Op.Size >= 32 &&
|
||||||
(!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
|
(!Op.IsMemset || Op.Size >= 64) &&
|
||||||
|
(!Op.SrcAlign || Op.SrcAlign >= 32) &&
|
||||||
|
(!Op.DstAlign || Op.DstAlign >= 32) &&
|
||||||
!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
|
!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
|
||||||
return MVT::v4f64;
|
return MVT::v4f64;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We should use Altivec/VSX loads and stores when available. For unaligned
|
// We should use Altivec/VSX loads and stores when available. For unaligned
|
||||||
// addresses, unaligned VSX loads are only fast starting with the P8.
|
// addresses, unaligned VSX loads are only fast starting with the P8.
|
||||||
if (Subtarget.hasAltivec() && Size >= 16 &&
|
if (Subtarget.hasAltivec() && Op.Size >= 16 &&
|
||||||
(((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
|
(((!Op.SrcAlign || Op.SrcAlign >= 16) &&
|
||||||
((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
|
(!Op.DstAlign || Op.DstAlign >= 16)) ||
|
||||||
|
((Op.IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
|
||||||
return MVT::v4i32;
|
return MVT::v4i32;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -892,20 +892,9 @@ namespace llvm {
|
||||||
MachineFunction &MF,
|
MachineFunction &MF,
|
||||||
unsigned Intrinsic) const override;
|
unsigned Intrinsic) const override;
|
||||||
|
|
||||||
/// getOptimalMemOpType - Returns the target specific optimal type for load
|
|
||||||
/// and store operations as a result of memset, memcpy, and memmove
|
|
||||||
/// lowering. If DstAlign is zero that means it's safe to destination
|
|
||||||
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
|
|
||||||
/// means there isn't a need to check it against alignment requirement,
|
|
||||||
/// probably because the source does not need to be loaded. If 'IsMemset' is
|
|
||||||
/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
|
|
||||||
/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
|
|
||||||
/// source is constant so it does not need to be loaded.
|
|
||||||
/// It returns EVT::Other if the type should be determined using generic
|
/// It returns EVT::Other if the type should be determined using generic
|
||||||
/// target-independent logic.
|
/// target-independent logic.
|
||||||
EVT
|
EVT getOptimalMemOpType(const MemOp &Op,
|
||||||
getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
|
|
||||||
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
|
|
||||||
const AttributeList &FuncAttributes) const override;
|
const AttributeList &FuncAttributes) const override;
|
||||||
|
|
||||||
/// Is unaligned memory access allowed for the given type, and is it fast
|
/// Is unaligned memory access allowed for the given type, and is it fast
|
||||||
|
|
|
@ -2245,34 +2245,23 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
|
||||||
return Align;
|
return Align;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the target specific optimal type for load
|
|
||||||
/// and store operations as a result of memset, memcpy, and memmove
|
|
||||||
/// lowering. If DstAlign is zero that means it's safe to destination
|
|
||||||
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
|
|
||||||
/// means there isn't a need to check it against alignment requirement,
|
|
||||||
/// probably because the source does not need to be loaded. If 'IsMemset' is
|
|
||||||
/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
|
|
||||||
/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
|
|
||||||
/// source is constant so it does not need to be loaded.
|
|
||||||
/// It returns EVT::Other if the type should be determined using generic
|
/// It returns EVT::Other if the type should be determined using generic
|
||||||
/// target-independent logic.
|
/// target-independent logic.
|
||||||
/// For vector ops we check that the overall size isn't larger than our
|
/// For vector ops we check that the overall size isn't larger than our
|
||||||
/// preferred vector width.
|
/// preferred vector width.
|
||||||
EVT X86TargetLowering::getOptimalMemOpType(
|
EVT X86TargetLowering::getOptimalMemOpType(
|
||||||
uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
|
const MemOp &Op, const AttributeList &FuncAttributes) const {
|
||||||
bool ZeroMemset, bool MemcpyStrSrc,
|
|
||||||
const AttributeList &FuncAttributes) const {
|
|
||||||
if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
|
if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
|
||||||
if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() ||
|
if (Op.Size >= 16 && (!Subtarget.isUnalignedMem16Slow() ||
|
||||||
((DstAlign == 0 || DstAlign >= 16) &&
|
((Op.DstAlign == 0 || Op.DstAlign >= 16) &&
|
||||||
(SrcAlign == 0 || SrcAlign >= 16)))) {
|
(Op.SrcAlign == 0 || Op.SrcAlign >= 16)))) {
|
||||||
// FIXME: Check if unaligned 64-byte accesses are slow.
|
// FIXME: Check if unaligned 64-byte accesses are slow.
|
||||||
if (Size >= 64 && Subtarget.hasAVX512() &&
|
if (Op.Size >= 64 && Subtarget.hasAVX512() &&
|
||||||
(Subtarget.getPreferVectorWidth() >= 512)) {
|
(Subtarget.getPreferVectorWidth() >= 512)) {
|
||||||
return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
|
return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
|
||||||
}
|
}
|
||||||
// FIXME: Check if unaligned 32-byte accesses are slow.
|
// FIXME: Check if unaligned 32-byte accesses are slow.
|
||||||
if (Size >= 32 && Subtarget.hasAVX() &&
|
if (Op.Size >= 32 && Subtarget.hasAVX() &&
|
||||||
(Subtarget.getPreferVectorWidth() >= 256)) {
|
(Subtarget.getPreferVectorWidth() >= 256)) {
|
||||||
// Although this isn't a well-supported type for AVX1, we'll let
|
// Although this isn't a well-supported type for AVX1, we'll let
|
||||||
// legalization and shuffle lowering produce the optimal codegen. If we
|
// legalization and shuffle lowering produce the optimal codegen. If we
|
||||||
|
@ -2288,8 +2277,8 @@ EVT X86TargetLowering::getOptimalMemOpType(
|
||||||
if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
|
if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
|
||||||
(Subtarget.getPreferVectorWidth() >= 128))
|
(Subtarget.getPreferVectorWidth() >= 128))
|
||||||
return MVT::v4f32;
|
return MVT::v4f32;
|
||||||
} else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
|
} else if ((!Op.IsMemset || Op.ZeroMemset) && !Op.MemcpyStrSrc &&
|
||||||
!Subtarget.is64Bit() && Subtarget.hasSSE2()) {
|
Op.Size >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
|
||||||
// Do not use f64 to lower memcpy if source is string constant. It's
|
// Do not use f64 to lower memcpy if source is string constant. It's
|
||||||
// better to use i32 to avoid the loads.
|
// better to use i32 to avoid the loads.
|
||||||
// Also, do not use f64 to lower memset unless this is a memset of zeros.
|
// Also, do not use f64 to lower memset unless this is a memset of zeros.
|
||||||
|
@ -2302,7 +2291,7 @@ EVT X86TargetLowering::getOptimalMemOpType(
|
||||||
// This is a compromise. If we reach here, unaligned accesses may be slow on
|
// This is a compromise. If we reach here, unaligned accesses may be slow on
|
||||||
// this target. However, creating smaller, aligned accesses could be even
|
// this target. However, creating smaller, aligned accesses could be even
|
||||||
// slower and would certainly be a lot more code.
|
// slower and would certainly be a lot more code.
|
||||||
if (Subtarget.is64Bit() && Size >= 8)
|
if (Subtarget.is64Bit() && Op.Size >= 8)
|
||||||
return MVT::i64;
|
return MVT::i64;
|
||||||
return MVT::i32;
|
return MVT::i32;
|
||||||
}
|
}
|
||||||
|
|
|
@ -758,19 +758,7 @@ namespace llvm {
|
||||||
unsigned getByValTypeAlignment(Type *Ty,
|
unsigned getByValTypeAlignment(Type *Ty,
|
||||||
const DataLayout &DL) const override;
|
const DataLayout &DL) const override;
|
||||||
|
|
||||||
/// Returns the target specific optimal type for load
|
EVT getOptimalMemOpType(const MemOp &Op,
|
||||||
/// and store operations as a result of memset, memcpy, and memmove
|
|
||||||
/// lowering. If DstAlign is zero that means it's safe to destination
|
|
||||||
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
|
|
||||||
/// means there isn't a need to check it against alignment requirement,
|
|
||||||
/// probably because the source does not need to be loaded. If 'IsMemset' is
|
|
||||||
/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
|
|
||||||
/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
|
|
||||||
/// source is constant so it does not need to be loaded.
|
|
||||||
/// It returns EVT::Other if the type should be determined using generic
|
|
||||||
/// target-independent logic.
|
|
||||||
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
|
|
||||||
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
|
|
||||||
const AttributeList &FuncAttributes) const override;
|
const AttributeList &FuncAttributes) const override;
|
||||||
|
|
||||||
/// Returns true if it's safe to use load / store of the
|
/// Returns true if it's safe to use load / store of the
|
||||||
|
|
Loading…
Reference in New Issue