[NFC] Introduce a type to model memory operation

Summary: This is a first step before changing the types to llvm::Align and introduce functions to ease client code. Reviewers: courbet Subscribers: arsenm, sdardis, nemanjai, jvesely, nhaehnle, hiraditya, kbarton, jrtc27, atanasyan, jsji, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D73785
2020-01-31 15:40:31 +01:00 · 2020-01-31 15:40:31 +01:00 · 3c89b75f23
parent edc3f4f02e
commit 3c89b75f23
20 changed files with 164 additions and 223 deletions
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@ -106,6 +106,49 @@ namespace Sched {
 } // end namespace Sched
 // MemOp models a memory operation, either memset or memcpy/memmove.
 struct MemOp {
  // Shared
  uint64_t Size;
  unsigned DstAlign; // Specified alignment of the memory operation or zero if
                     // destination alignment can satisfy any constraint.
  bool AllowOverlap;
  // memset only
  bool IsMemset;   // If setthis memory operation is a memset.
  bool ZeroMemset; // If set clears out memory with zeros.
  // memcpy only
  bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register
                     // constant so it does not need to be loaded.
  unsigned SrcAlign; // Inferred alignment of the source or zero if the memory
                     // operation does not need to load the value.
  static MemOp Copy(uint64_t Size, bool DstAlignCanChange, unsigned DstAlign,
                    unsigned SrcAlign, bool IsVolatile,
                    bool MemcpyStrSrc = false) {
    return {
        /*.Size =*/Size,
        /*.DstAlign =*/DstAlignCanChange ? 0 : DstAlign,
        /*.AllowOverlap =*/!IsVolatile,
        /*.IsMemset =*/false,
        /*.ZeroMemset =*/false,
        /*.MemcpyStrSrc =*/MemcpyStrSrc,
        /*.SrcAlign =*/SrcAlign,
    };
  }
  static MemOp Set(uint64_t Size, bool DstAlignCanChange, unsigned DstAlign,
                   bool IsZeroMemset, bool IsVolatile) {
    return {
        /*.Size =*/Size,
        /*.DstAlign =*/DstAlignCanChange ? 0 : DstAlign,
        /*.AllowOverlap =*/!IsVolatile,
        /*.IsMemset =*/true,
        /*.ZeroMemset =*/IsZeroMemset,
        /*.MemcpyStrSrc =*/false,
        /*.SrcAlign =*/0,
    };
  }
 };
 /// This base class for TargetLowering contains the SelectionDAG-independent
 /// parts that can be used from the rest of CodeGen.
 class TargetLoweringBase {
@ -1518,29 +1561,17 @@ public:
  /// Returns the target specific optimal type for load and store operations as
  /// a result of memset, memcpy, and memmove lowering.
-  ///
+  /// It returns EVT::Other if the type should be determined using generic
-  /// If DstAlign is zero that means it's safe to destination alignment can
+  /// target-independent logic.
  /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
  /// a need to check it against alignment requirement, probably because the
  /// source does not need to be loaded. If 'IsMemset' is true, that means it's
  /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
  /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
  /// does not need to be loaded.  It returns EVT::Other if the type should be
  /// determined using generic target-independent logic.
  virtual EVT
-  getOptimalMemOpType(uint64_t /*Size*/, unsigned /*DstAlign*/,
+  getOptimalMemOpType(const MemOp &Op,
                      unsigned /*SrcAlign*/, bool /*IsMemset*/,
                      bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/,
                      const AttributeList & /*FuncAttributes*/) const {
    return MVT::Other;
  }
  /// LLT returning variant.
  virtual LLT
-  getOptimalMemOpLLT(uint64_t /*Size*/, unsigned /*DstAlign*/,
+  getOptimalMemOpLLT(const MemOp &Op,
                     unsigned /*SrcAlign*/, bool /*IsMemset*/,
                     bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/,
                     const AttributeList & /*FuncAttributes*/) const {
    return LLT();
  }
@ -3102,14 +3133,8 @@ public:
  /// Return true if the number of memory ops is below the threshold (Limit).
  /// It returns the types of the sequence of memory ops to perform
  /// memset / memcpy by reference.
-  bool findOptimalMemOpLowering(std::vector<EVT> &MemOps,
+  bool findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
-                                unsigned Limit, uint64_t Size,
+                                const MemOp &Op, unsigned DstAS, unsigned SrcAS,
                                unsigned DstAlign, unsigned SrcAlign,
                                bool IsMemset,
                                bool ZeroMemset,
                                bool MemcpyStrSrc,
                                bool AllowOverlap,
                                unsigned DstAS, unsigned SrcAS,
                                const AttributeList &FuncAttributes) const;
  /// Check to see if the specified operand of the specified instruction is a
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@ -855,37 +855,30 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
 // Returns a list of types to use for memory op lowering in MemOps. A partial
 // port of findOptimalMemOpLowering in TargetLowering.
-static bool findGISelOptimalMemOpLowering(
+static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
-    std::vector<LLT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign,
+                                          unsigned Limit, const MemOp &Op,
-    unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
+                                          unsigned DstAS, unsigned SrcAS,
-    bool AllowOverlap, unsigned DstAS, unsigned SrcAS,
+                                          const AttributeList &FuncAttributes,
-    const AttributeList &FuncAttributes, const TargetLowering &TLI) {
+                                          const TargetLowering &TLI) {
-  // If 'SrcAlign' is zero, that means the memory operation does not need to
+  if (Op.SrcAlign != 0 && Op.SrcAlign < Op.DstAlign)
  // load the value, i.e. memset or memcpy from constant string. Otherwise,
  // it's the inferred alignment of the source. 'DstAlign', on the other hand,
  // is the specified alignment of the memory operation. If it is zero, that
  // means it's possible to change the alignment of the destination.
  // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
  // not need to be loaded.
  if (SrcAlign != 0 && SrcAlign < DstAlign)
    return false;
-  LLT Ty = TLI.getOptimalMemOpLLT(Size, DstAlign, SrcAlign, IsMemset,
+  LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
                                  ZeroMemset, MemcpyStrSrc, FuncAttributes);
  if (Ty == LLT()) {
    // Use the largest scalar type whose alignment constraints are satisfied.
    // We only need to check DstAlign here as SrcAlign is always greater or
    // equal to DstAlign (or zero).
    Ty = LLT::scalar(64);
-    while (DstAlign && DstAlign < Ty.getSizeInBytes() &&
+    while (Op.DstAlign && Op.DstAlign < Ty.getSizeInBytes() &&
-           !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, DstAlign))
+           !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.DstAlign))
      Ty = LLT::scalar(Ty.getSizeInBytes());
    assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
    // FIXME: check for the largest legal type we can load/store to.
  }
  unsigned NumMemOps = 0;
  auto Size = Op.Size;
  while (Size != 0) {
    unsigned TySize = Ty.getSizeInBytes();
    while (TySize > Size) {
@ -904,9 +897,9 @@ static bool findGISelOptimalMemOpLowering(
      bool Fast;
      // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
      MVT VT = getMVTForLLT(Ty);
-      if (NumMemOps && AllowOverlap && NewTySize < Size &&
+      if (NumMemOps && Op.AllowOverlap && NewTySize < Size &&
          TLI.allowsMisalignedMemoryAccesses(
-              VT, DstAS, DstAlign, MachineMemOperand::MONone, &Fast) &&
+              VT, DstAS, Op.DstAlign, MachineMemOperand::MONone, &Fast) &&
          Fast)
        TySize = Size;
      else {
@ -988,11 +981,12 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val
  auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
  bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
-  if (!findGISelOptimalMemOpLowering(
+  if (!findGISelOptimalMemOpLowering(MemOps, Limit,
-          MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), 0,
+                                     MemOp::Set(KnownLen, DstAlignCanChange,
-          /*IsMemset=*/true,
+                                                Align,
-          /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
+                                                /*IsZeroMemset=*/IsZeroVal,
-          /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), ~0u,
+                                                /*IsVolatile=*/IsVolatile),
                                     DstPtrInfo.getAddrSpace(), ~0u,
                                     MF.getFunction().getAttributes(), TLI))
    return false;
@ -1107,12 +1101,11 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
  MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
  if (!findGISelOptimalMemOpLowering(
-          MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
+          MemOps, Limit,
-          SrcAlign,
+          MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
-          /*IsMemset=*/false,
+                      IsVolatile),
-          /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
+          DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
-          /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(),
+          MF.getFunction().getAttributes(), TLI))
          SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
    return false;
  if (DstAlignCanChange) {
@ -1214,12 +1207,11 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
  // to a bug in it's findOptimalMemOpLowering implementation. For now do the
  // same thing here.
  if (!findGISelOptimalMemOpLowering(
-          MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
+          MemOps, Limit,
-          SrcAlign,
+          MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
-          /*IsMemset=*/false,
+                      /*IsVolatile*/ true),
-          /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
+          DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
-          /*AllowOverlap=*/false, DstPtrInfo.getAddrSpace(),
+          MF.getFunction().getAttributes(), TLI))
          SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
    return false;
  if (DstAlignCanChange) {
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -5908,13 +5908,12 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
  bool CopyFromConstant = isMemSrcFromConstant(Src, Slice);
  bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
  unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
  if (!TLI.findOptimalMemOpLowering(
-          MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment),
+          MemOps, Limit,
-          (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false,
+          MemOp::Copy(Size, DstAlignCanChange, Alignment,
-          /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant,
+                      isZeroConstant ? 0 : SrcAlign, isVol, CopyFromConstant),
-          /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(),
+          DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
-          SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes()))
+          MF.getFunction().getAttributes()))
    return SDValue();
  if (DstAlignCanChange) {
@ -6088,14 +6087,11 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
  if (Align > SrcAlign)
    SrcAlign = Align;
  unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
  // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in
  // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the
  // correct code.
  bool AllowOverlap = false;
  if (!TLI.findOptimalMemOpLowering(
-          MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign,
+          MemOps, Limit,
-          /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
+          MemOp::Copy(Size, DstAlignCanChange, Align, SrcAlign,
-          AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+                      /*IsVolatile*/ true),
          DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
          MF.getFunction().getAttributes()))
    return SDValue();
@ -6193,11 +6189,9 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
  bool IsZeroVal =
    isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
  if (!TLI.findOptimalMemOpLowering(
-          MemOps, TLI.getMaxStoresPerMemset(OptSize), Size,
+          MemOps, TLI.getMaxStoresPerMemset(OptSize),
-          (DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true,
+          MemOp::Set(Size, DstAlignCanChange, Align, IsZeroVal, isVol),
-          /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
+          DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes()))
          /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u,
          MF.getFunction().getAttributes()))
    return SDValue();
  if (DstAlignCanChange) {
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@ -176,16 +176,9 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
  return LowerCallTo(CLI);
 }
-bool
+bool TargetLowering::findOptimalMemOpLowering(
-TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
+    std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
-                                         unsigned Limit, uint64_t Size,
+    unsigned SrcAS, const AttributeList &FuncAttributes) const {
                                         unsigned DstAlign, unsigned SrcAlign,
                                         bool IsMemset,
                                         bool ZeroMemset,
                                         bool MemcpyStrSrc,
                                         bool AllowOverlap,
                                         unsigned DstAS, unsigned SrcAS,
                                         const AttributeList &FuncAttributes) const {
  // If 'SrcAlign' is zero, that means the memory operation does not need to
  // load the value, i.e. memset or memcpy from constant string. Otherwise,
  // it's the inferred alignment of the source. 'DstAlign', on the other hand,
@ -193,20 +186,18 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
  // means it's possible to change the alignment of the destination.
  // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
  // not need to be loaded.
-  if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
+  if (!(Op.SrcAlign == 0 || Op.SrcAlign >= Op.DstAlign))
    return false;
-  EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
+  EVT VT = getOptimalMemOpType(Op, FuncAttributes);
                               IsMemset, ZeroMemset, MemcpyStrSrc,
                               FuncAttributes);
  if (VT == MVT::Other) {
    // Use the largest integer type whose alignment constraints are satisfied.
    // We only need to check DstAlign here as SrcAlign is always greater or
    // equal to DstAlign (or zero).
    VT = MVT::i64;
-    while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
+    while (Op.DstAlign && Op.DstAlign < VT.getSizeInBits() / 8 &&
-           !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
+           !allowsMisalignedMemoryAccesses(VT, DstAS, Op.DstAlign))
      VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
    assert(VT.isInteger());
@ -223,6 +214,7 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
  }
  unsigned NumMemOps = 0;
  auto Size = Op.Size;
  while (Size != 0) {
    unsigned VTSize = VT.getSizeInBits() / 8;
    while (VTSize > Size) {
@ -257,8 +249,8 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
      // If the new VT cannot cover all of the remaining bits, then consider
      // issuing a (or a pair of) unaligned and overlapping load / store.
      bool Fast;
-      if (NumMemOps && AllowOverlap && NewVTSize < Size &&
+      if (NumMemOps && Op.AllowOverlap && NewVTSize < Size &&
-          allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign,
+          allowsMisalignedMemoryAccesses(VT, DstAS, Op.DstAlign,
                                         MachineMemOperand::MONone, &Fast) &&
          Fast)
        VTSize = Size;
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -9426,9 +9426,7 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
 }
 EVT AArch64TargetLowering::getOptimalMemOpType(
-    uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+    const MemOp &Op, const AttributeList &FuncAttributes) const {
    bool ZeroMemset, bool MemcpyStrSrc,
    const AttributeList &FuncAttributes) const {
  bool CanImplicitFloat =
      !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
  bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
@ -9436,9 +9434,9 @@ EVT AArch64TargetLowering::getOptimalMemOpType(
  // Only use AdvSIMD to implement memset of 32-byte and above. It would have
  // taken one instruction to materialize the v2i64 zero and one store (with
  // restrictive addressing mode). Just do i64 stores.
-  bool IsSmallMemset = IsMemset && Size < 32;
+  bool IsSmallMemset = Op.IsMemset && Op.Size < 32;
  auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) {
-    if (memOpAlign(SrcAlign, DstAlign, AlignCheck))
+    if (memOpAlign(Op.SrcAlign, Op.DstAlign, AlignCheck))
      return true;
    bool Fast;
    return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone,
@ -9446,22 +9444,20 @@ EVT AArch64TargetLowering::getOptimalMemOpType(
           Fast;
  };
-  if (CanUseNEON && IsMemset && !IsSmallMemset &&
+  if (CanUseNEON && Op.IsMemset && !IsSmallMemset &&
      AlignmentIsAcceptable(MVT::v2i64, 16))
    return MVT::v2i64;
  if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16))
    return MVT::f128;
-  if (Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
+  if (Op.Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
    return MVT::i64;
-  if (Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
+  if (Op.Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
    return MVT::i32;
  return MVT::Other;
 }
 LLT AArch64TargetLowering::getOptimalMemOpLLT(
-    uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+    const MemOp &Op, const AttributeList &FuncAttributes) const {
    bool ZeroMemset, bool MemcpyStrSrc,
    const AttributeList &FuncAttributes) const {
  bool CanImplicitFloat =
      !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
  bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
@ -9469,9 +9465,9 @@ LLT AArch64TargetLowering::getOptimalMemOpLLT(
  // Only use AdvSIMD to implement memset of 32-byte and above. It would have
  // taken one instruction to materialize the v2i64 zero and one store (with
  // restrictive addressing mode). Just do i64 stores.
-  bool IsSmallMemset = IsMemset && Size < 32;
+  bool IsSmallMemset = Op.IsMemset && Op.Size < 32;
  auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) {
-    if (memOpAlign(SrcAlign, DstAlign, AlignCheck))
+    if (memOpAlign(Op.SrcAlign, Op.DstAlign, AlignCheck))
      return true;
    bool Fast;
    return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone,
@ -9479,14 +9475,14 @@ LLT AArch64TargetLowering::getOptimalMemOpLLT(
           Fast;
  };
-  if (CanUseNEON && IsMemset && !IsSmallMemset &&
+  if (CanUseNEON && Op.IsMemset && !IsSmallMemset &&
      AlignmentIsAcceptable(MVT::v2i64, 16))
    return LLT::vector(2, 64);
  if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16))
    return LLT::scalar(128);
-  if (Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
+  if (Op.Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8))
    return LLT::scalar(64);
-  if (Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
+  if (Op.Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4))
    return LLT::scalar(32);
  return LLT();
 }
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@ -428,12 +428,10 @@ public:
  bool shouldConsiderGEPOffsetSplit() const override;
-  EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+  EVT getOptimalMemOpType(const MemOp &Op,
                          bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
                          const AttributeList &FuncAttributes) const override;
-  LLT getOptimalMemOpLLT(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+  LLT getOptimalMemOpLLT(const MemOp &Op,
                          bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
                         const AttributeList &FuncAttributes) const override;
  /// Return true if the addressing mode represented by AM is legal for this
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@ -1320,18 +1320,16 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
 }
 EVT SITargetLowering::getOptimalMemOpType(
-    uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+    const MemOp &Op, const AttributeList &FuncAttributes) const {
    bool ZeroMemset, bool MemcpyStrSrc,
    const AttributeList &FuncAttributes) const {
  // FIXME: Should account for address space here.
  // The default fallback uses the private pointer size as a guess for a type to
  // use. Make sure we switch these to 64-bit accesses.
-  if (Size >= 16 && DstAlign >= 4) // XXX: Should only do for global
+  if (Op.Size >= 16 && Op.DstAlign >= 4) // XXX: Should only do for global
    return MVT::v4i32;
-  if (Size >= 8 && DstAlign >= 4)
+  if (Op.Size >= 8 && Op.DstAlign >= 4)
    return MVT::v2i32;
  // Use the default.
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@ -257,10 +257,7 @@ public:
      MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
      bool *IsFast = nullptr) const override;
-  EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+  EVT getOptimalMemOpType(const MemOp &Op,
                          unsigned SrcAlign, bool IsMemset,
                          bool ZeroMemset,
                          bool MemcpyStrSrc,
                          const AttributeList &FuncAttributes) const override;
  bool isMemOpUniform(const SDNode *N) const;
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@ -14961,21 +14961,19 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
 }
 EVT ARMTargetLowering::getOptimalMemOpType(
-    uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+    const MemOp &Op, const AttributeList &FuncAttributes) const {
    bool ZeroMemset, bool MemcpyStrSrc,
    const AttributeList &FuncAttributes) const {
  // See if we can use NEON instructions for this...
-  if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() &&
+  if ((!Op.IsMemset || Op.ZeroMemset) && Subtarget->hasNEON() &&
      !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
    bool Fast;
-    if (Size >= 16 &&
+    if (Op.Size >= 16 &&
-        (memOpAlign(SrcAlign, DstAlign, 16) ||
+        (memOpAlign(Op.SrcAlign, Op.DstAlign, 16) ||
         (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1,
                                         MachineMemOperand::MONone, &Fast) &&
          Fast))) {
      return MVT::v2f64;
-    } else if (Size >= 8 &&
+    } else if (Op.Size >= 8 &&
-               (memOpAlign(SrcAlign, DstAlign, 8) ||
+               (memOpAlign(Op.SrcAlign, Op.DstAlign, 8) ||
                (allowsMisalignedMemoryAccesses(
                     MVT::f64, 0, 1, MachineMemOperand::MONone, &Fast) &&
                 Fast))) {
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@ -347,10 +347,7 @@ class VectorType;
                                        MachineMemOperand::Flags Flags,
                                        bool *Fast) const override;
-    EVT getOptimalMemOpType(uint64_t Size,
+    EVT getOptimalMemOpType(const MemOp &Op,
                            unsigned DstAlign, unsigned SrcAlign,
                            bool IsMemset, bool ZeroMemset,
                            bool MemcpyStrSrc,
                            const AttributeList &FuncAttributes) const override;
    bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@ -576,8 +576,9 @@ int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
  // loaded and stored. That's why we multiply the number of elements by 2 to
  // get the cost for this memcpy.
  if (getTLI()->findOptimalMemOpLowering(
-          MemOps, Limit, Size, DstAlign, SrcAlign, false /*IsMemset*/,
+          MemOps, Limit,
-          false /*ZeroMemset*/, false /*MemcpyStrSrc*/, false /*AllowOverlap*/,
+          MemOp::Copy(Size, /*DstAlignCanChange*/ false, DstAlign, SrcAlign,
                      /*IsVolatile*/ true),
          MI->getDestAddressSpace(), MI->getSourceAddressSpace(),
          F->getAttributes()))
    return MemOps.size() * 2;
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@ -99,10 +99,9 @@ private:
                      const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
                      SelectionDAG &DAG) const override;
-  EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+  EVT getOptimalMemOpType(const MemOp &Op,
                          bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
                          const AttributeList &FuncAttributes) const override {
-    return Size >= 8 ? MVT::i64 : MVT::i32;
+    return Op.Size >= 8 ? MVT::i64 : MVT::i32;
  }
  bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@ -3379,19 +3379,21 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
 /// does not need to be loaded.  It returns EVT::Other if the type should be
 /// determined using generic target-independent logic.
-EVT HexagonTargetLowering::getOptimalMemOpType(uint64_t Size,
+EVT HexagonTargetLowering::getOptimalMemOpType(
-      unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset,
+    const MemOp &Op, const AttributeList &FuncAttributes) const {
      bool MemcpyStrSrc, const AttributeList &FuncAttributes) const {
  auto Aligned = [](unsigned GivenA, unsigned MinA) -> bool {
    return (GivenA % MinA) == 0;
  };
-  if (Size >= 8 && Aligned(DstAlign, 8) && (IsMemset || Aligned(SrcAlign, 8)))
+  if (Op.Size >= 8 && Aligned(Op.DstAlign, 8) &&
      (Op.IsMemset || Aligned(Op.SrcAlign, 8)))
    return MVT::i64;
-  if (Size >= 4 && Aligned(DstAlign, 4) && (IsMemset || Aligned(SrcAlign, 4)))
+  if (Op.Size >= 4 && Aligned(Op.DstAlign, 4) &&
      (Op.IsMemset || Aligned(Op.SrcAlign, 4)))
    return MVT::i32;
-  if (Size >= 2 && Aligned(DstAlign, 2) && (IsMemset || Aligned(SrcAlign, 2)))
+  if (Op.Size >= 2 && Aligned(Op.DstAlign, 2) &&
      (Op.IsMemset || Aligned(Op.SrcAlign, 2)))
    return MVT::i16;
  return MVT::Other;
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@ -302,8 +302,7 @@ namespace HexagonISD {
    /// the immediate into a register.
    bool isLegalICmpImmediate(int64_t Imm) const override;
-    EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+    EVT getOptimalMemOpType(const MemOp &Op,
        unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
                            const AttributeList &FuncAttributes) const override;
    bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@ -4269,9 +4269,7 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 }
 EVT MipsTargetLowering::getOptimalMemOpType(
-    uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+    const MemOp &Op, const AttributeList &FuncAttributes) const {
    bool ZeroMemset, bool MemcpyStrSrc,
    const AttributeList &FuncAttributes) const {
  if (Subtarget.hasMips64())
    return MVT::i64;
--- a/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsISelLowering.h
@ -669,10 +669,7 @@ class TargetRegisterClass;
    bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
-    EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+    EVT getOptimalMemOpType(const MemOp &Op,
                            unsigned SrcAlign,
                            bool IsMemset, bool ZeroMemset,
                            bool MemcpyStrSrc,
                            const AttributeList &FuncAttributes) const override;
    /// isFPImmLegal - Returns true if the target can instruction select the
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -15069,35 +15069,27 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
  return false;
 }
 /// getOptimalMemOpType - Returns the target specific optimal type for load
 /// and store operations as a result of memset, memcpy, and memmove
 /// lowering. If DstAlign is zero that means it's safe to destination
 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 /// means there isn't a need to check it against alignment requirement,
 /// probably because the source does not need to be loaded. If 'IsMemset' is
 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
 /// source is constant so it does not need to be loaded.
 /// It returns EVT::Other if the type should be determined using generic
 /// target-independent logic.
 EVT PPCTargetLowering::getOptimalMemOpType(
-    uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+    const MemOp &Op, const AttributeList &FuncAttributes) const {
    bool ZeroMemset, bool MemcpyStrSrc,
    const AttributeList &FuncAttributes) const {
  if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
    // When expanding a memset, require at least two QPX instructions to cover
    // the cost of loading the value to be stored from the constant pool.
-    if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
+    if (Subtarget.hasQPX() && Op.Size >= 32 &&
-       (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
+        (!Op.IsMemset || Op.Size >= 64) &&
        (!Op.SrcAlign || Op.SrcAlign >= 32) &&
        (!Op.DstAlign || Op.DstAlign >= 32) &&
        !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
      return MVT::v4f64;
    }
    // We should use Altivec/VSX loads and stores when available. For unaligned
    // addresses, unaligned VSX loads are only fast starting with the P8.
-    if (Subtarget.hasAltivec() && Size >= 16 &&
+    if (Subtarget.hasAltivec() && Op.Size >= 16 &&
-        (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
+        (((!Op.SrcAlign || Op.SrcAlign >= 16) &&
-         ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
+          (!Op.DstAlign || Op.DstAlign >= 16)) ||
         ((Op.IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
      return MVT::v4i32;
  }
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@ -892,20 +892,9 @@ namespace llvm {
                            MachineFunction &MF,
                            unsigned Intrinsic) const override;
    /// getOptimalMemOpType - Returns the target specific optimal type for load
    /// and store operations as a result of memset, memcpy, and memmove
    /// lowering. If DstAlign is zero that means it's safe to destination
    /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
    /// means there isn't a need to check it against alignment requirement,
    /// probably because the source does not need to be loaded. If 'IsMemset' is
    /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
    /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
    /// source is constant so it does not need to be loaded.
    /// It returns EVT::Other if the type should be determined using generic
    /// target-independent logic.
-    EVT
+    EVT getOptimalMemOpType(const MemOp &Op,
    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
                        bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
                            const AttributeList &FuncAttributes) const override;
    /// Is unaligned memory access allowed for the given type, and is it fast
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -2245,34 +2245,23 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
  return Align;
 }
 /// Returns the target specific optimal type for load
 /// and store operations as a result of memset, memcpy, and memmove
 /// lowering. If DstAlign is zero that means it's safe to destination
 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 /// means there isn't a need to check it against alignment requirement,
 /// probably because the source does not need to be loaded. If 'IsMemset' is
 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
 /// source is constant so it does not need to be loaded.
 /// It returns EVT::Other if the type should be determined using generic
 /// target-independent logic.
 /// For vector ops we check that the overall size isn't larger than our
 /// preferred vector width.
 EVT X86TargetLowering::getOptimalMemOpType(
-    uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+    const MemOp &Op, const AttributeList &FuncAttributes) const {
    bool ZeroMemset, bool MemcpyStrSrc,
    const AttributeList &FuncAttributes) const {
  if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
-    if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() ||
+    if (Op.Size >= 16 && (!Subtarget.isUnalignedMem16Slow() ||
-                       ((DstAlign == 0 || DstAlign >= 16) &&
+                          ((Op.DstAlign == 0 || Op.DstAlign >= 16) &&
-                        (SrcAlign == 0 || SrcAlign >= 16)))) {
+                           (Op.SrcAlign == 0 || Op.SrcAlign >= 16)))) {
      // FIXME: Check if unaligned 64-byte accesses are slow.
-      if (Size >= 64 && Subtarget.hasAVX512() &&
+      if (Op.Size >= 64 && Subtarget.hasAVX512() &&
          (Subtarget.getPreferVectorWidth() >= 512)) {
        return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
      }
      // FIXME: Check if unaligned 32-byte accesses are slow.
-      if (Size >= 32 && Subtarget.hasAVX() &&
+      if (Op.Size >= 32 && Subtarget.hasAVX() &&
          (Subtarget.getPreferVectorWidth() >= 256)) {
        // Although this isn't a well-supported type for AVX1, we'll let
        // legalization and shuffle lowering produce the optimal codegen. If we
@ -2288,8 +2277,8 @@ EVT X86TargetLowering::getOptimalMemOpType(
      if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
          (Subtarget.getPreferVectorWidth() >= 128))
        return MVT::v4f32;
-    } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
+    } else if ((!Op.IsMemset || Op.ZeroMemset) && !Op.MemcpyStrSrc &&
-               !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
+               Op.Size >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
      // Do not use f64 to lower memcpy if source is string constant. It's
      // better to use i32 to avoid the loads.
      // Also, do not use f64 to lower memset unless this is a memset of zeros.
@ -2302,7 +2291,7 @@ EVT X86TargetLowering::getOptimalMemOpType(
  // This is a compromise. If we reach here, unaligned accesses may be slow on
  // this target. However, creating smaller, aligned accesses could be even
  // slower and would certainly be a lot more code.
-  if (Subtarget.is64Bit() && Size >= 8)
+  if (Subtarget.is64Bit() && Op.Size >= 8)
    return MVT::i64;
  return MVT::i32;
 }
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@ -758,19 +758,7 @@ namespace llvm {
    unsigned getByValTypeAlignment(Type *Ty,
                                   const DataLayout &DL) const override;
-    /// Returns the target specific optimal type for load
+    EVT getOptimalMemOpType(const MemOp &Op,
    /// and store operations as a result of memset, memcpy, and memmove
    /// lowering. If DstAlign is zero that means it's safe to destination
    /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
    /// means there isn't a need to check it against alignment requirement,
    /// probably because the source does not need to be loaded. If 'IsMemset' is
    /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
    /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
    /// source is constant so it does not need to be loaded.
    /// It returns EVT::Other if the type should be determined using generic
    /// target-independent logic.
    EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
                            bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
                            const AttributeList &FuncAttributes) const override;
    /// Returns true if it's safe to use load / store of the