[NFCI] Move cost estimation from TargetLowering to TargetTransformInfo.

TragetLowering had two last InstructionCost related `getTypeLegalizationCost()` and `getScalingFactorCost()` members, but all other costs are processed in TTI. E.g. it is not comfortable to use other TTI members in these two functions overrided in a target. Minor refactoring: `getTypeLegalizationCost()` now doesn't need DataLayout parameter - it was always passed from TTI. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D117723
2022-08-18 00:38:34 +03:00 · 2022-08-18 00:38:34 +03:00 · 7ed3d81333
parent 56a34451e1
commit 7ed3d81333
23 changed files with 273 additions and 292 deletions
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@ -368,7 +368,9 @@ public:
    AM.BaseOffs = BaseOffset;
    AM.HasBaseReg = HasBaseReg;
    AM.Scale = Scale;
-    return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
+    if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace))
+      return 0;
+    return -1;
  }

  bool isTruncateFree(Type *Ty1, Type *Ty2) {
@ -784,6 +786,41 @@ public:
    return Cost;
  }

+  /// Estimate the cost of type-legalization and the legalized type.
+  std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const {
+    LLVMContext &C = Ty->getContext();
+    EVT MTy = getTLI()->getValueType(DL, Ty);
+
+    InstructionCost Cost = 1;
+    // We keep legalizing the type until we find a legal kind. We assume that
+    // the only operation that costs anything is the split. After splitting
+    // we need to handle two types.
+    while (true) {
+      TargetLoweringBase::LegalizeKind LK = getTLI()->getTypeConversion(C, MTy);
+
+      if (LK.first == TargetLoweringBase::TypeScalarizeScalableVector) {
+        // Ensure we return a sensible simple VT here, since many callers of
+        // this function require it.
+        MVT VT = MTy.isSimple() ? MTy.getSimpleVT() : MVT::i64;
+        return std::make_pair(InstructionCost::getInvalid(), VT);
+      }
+
+      if (LK.first == TargetLoweringBase::TypeLegal)
+        return std::make_pair(Cost, MTy.getSimpleVT());
+
+      if (LK.first == TargetLoweringBase::TypeSplitVector ||
+          LK.first == TargetLoweringBase::TypeExpandInteger)
+        Cost *= 2;
+
+      // Do not loop with f128 type.
+      if (MTy == LK.second)
+        return std::make_pair(Cost, MTy.getSimpleVT());
+
+      // Keep legalizing the type.
+      MTy = LK.second;
+    }
+  }
+
  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }

  InstructionCost getArithmeticInstrCost(
@ -806,7 +843,7 @@ public:
                                           Opd1PropInfo, Opd2PropInfo,
                                           Args, CxtI);

-    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);

    bool IsFloat = Ty->isFPOrFPVectorTy();
    // Assume that floating point arithmetic operations cost twice as much as
@ -940,10 +977,8 @@ public:
    const TargetLoweringBase *TLI = getTLI();
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
    assert(ISD && "Invalid opcode");
-    std::pair<InstructionCost, MVT> SrcLT =
-        TLI->getTypeLegalizationCost(DL, Src);
-    std::pair<InstructionCost, MVT> DstLT =
-        TLI->getTypeLegalizationCost(DL, Dst);
+    std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(Src);
+    std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(Dst);

    TypeSize SrcSize = SrcLT.second.getSizeInBits();
    TypeSize DstSize = DstLT.second.getSizeInBits();
@ -1038,7 +1073,7 @@ public:
      // If we are legalizing by splitting, query the concrete TTI for the cost
      // of casting the original vector twice. We also need to factor in the
      // cost of the split itself. Count that as 1, to be consistent with
-      // TLI->getTypeLegalizationCost().
+      // getTypeLegalizationCost().
      bool SplitSrc =
          TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
          TargetLowering::TypeSplitVector;
@ -1119,8 +1154,7 @@ public:
      if (CondTy->isVectorTy())
        ISD = ISD::VSELECT;
    }
-    std::pair<InstructionCost, MVT> LT =
-        TLI->getTypeLegalizationCost(DL, ValTy);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);

    if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
        !TLI->isOperationExpand(ISD, LT.second)) {
@ -1153,10 +1187,7 @@ public:

  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
                                     unsigned Index) {
-    std::pair<InstructionCost, MVT> LT =
-        getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
-
-    return LT.first;
+    return getRegUsageForType(Val->getScalarType());
  }

  InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
@ -1205,8 +1236,7 @@ public:
    // Assume types, such as structs, are expensive.
    if (getTLI()->getValueType(DL, Src,  true) == MVT::Other)
      return 4;
-    std::pair<InstructionCost, MVT> LT =
-        getTLI()->getTypeLegalizationCost(DL, Src);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);

    // Assuming that all loads of legal types cost 1.
    InstructionCost Cost = LT.first;
@ -1286,7 +1316,7 @@ public:

    // Legalize the vector type, and get the legalized and unlegalized type
    // sizes.
-    MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
+    MVT VecTyLT = getTypeLegalizationCost(VecTy).second;
    unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy);
    unsigned VecTyLTSize = VecTyLT.getStoreSize();

@ -1583,9 +1613,7 @@ public:
      // If we're not expanding the intrinsic then we assume this is cheap
      // to implement.
      if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) {
-        std::pair<InstructionCost, MVT> LT =
-            getTLI()->getTypeLegalizationCost(DL, RetTy);
-        return LT.first;
+        return getTypeLegalizationCost(RetTy).first;
      }

      // Create the expanded types that will be used to calculate the uadd_sat
@ -2031,8 +2059,7 @@ public:
    }

    const TargetLoweringBase *TLI = getTLI();
-    std::pair<InstructionCost, MVT> LT =
-        TLI->getTypeLegalizationCost(DL, RetTy);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);

    if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
      if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
@ -2128,8 +2155,7 @@ public:
  }

  unsigned getNumberOfParts(Type *Tp) {
-    std::pair<InstructionCost, MVT> LT =
-        getTLI()->getTypeLegalizationCost(DL, Tp);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
    return LT.first.isValid() ? *LT.first.getValue() : 0;
  }

@ -2187,8 +2213,7 @@ public:
    unsigned NumReduxLevels = Log2_32(NumVecElts);
    InstructionCost ArithCost = 0;
    InstructionCost ShuffleCost = 0;
-    std::pair<InstructionCost, MVT> LT =
-        thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
+    std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);
    unsigned LongVectorCount = 0;
    unsigned MVTLen =
        LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
@ -2283,8 +2308,7 @@ public:
    }
    InstructionCost MinMaxCost = 0;
    InstructionCost ShuffleCost = 0;
-    std::pair<InstructionCost, MVT> LT =
-        thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
+    std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);
    unsigned LongVectorCount = 0;
    unsigned MVTLen =
        LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@ -49,7 +49,6 @@
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/InstructionCost.h"
 #include "llvm/Support/MachineValueType.h"
 #include <algorithm>
 #include <cassert>
@ -964,6 +963,22 @@ public:
    return ValueTypeActions;
  }

+  /// Return pair that represents the legalization kind (first) that needs to
+  /// happen to EVT (second) in order to type-legalize it.
+  ///
+  /// First: how we should legalize values of this type, either it is already
+  /// legal (return 'Legal') or we need to promote it to a larger type (return
+  /// 'Promote'), or we need to expand it into multiple registers of smaller
+  /// integer type (return 'Expand').  'Custom' is not an option.
+  ///
+  /// Second: for types supported by the target, this is an identity function.
+  /// For types that must be promoted to larger types, this returns the larger
+  /// type to promote to.  For integer types that are larger than the largest
+  /// integer register, this contains one step in the expansion to get to the
+  /// smaller register. For illegal floating point types, this returns the
+  /// integer type to transform to.
+  LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const;
+
  /// Return how we should legalize values of this type, either it is already
  /// legal (return 'Legal') or we need to promote it to a larger type (return
  /// 'Promote'), or we need to expand it into multiple registers of smaller
@ -1905,10 +1920,6 @@ public:
  /// Get the ISD node that corresponds to the Instruction class opcode.
  int InstructionOpcodeToISD(unsigned Opcode) const;

-  /// Estimate the cost of type-legalization and the legalized type.
-  std::pair<InstructionCost, MVT> getTypeLegalizationCost(const DataLayout &DL,
-                                                          Type *Ty) const;
-
  /// @}

  //===--------------------------------------------------------------------===//
@ -2535,22 +2546,6 @@ public:
                                     Type *Ty, unsigned AddrSpace,
                                     Instruction *I = nullptr) const;

-  /// Return the cost of the scaling factor used in the addressing mode
-  /// represented by AM for this target, for a load/store of the specified type.
-  ///
-  /// If the AM is supported, the return value must be >= 0.
-  /// If the AM is not supported, it returns a negative value.
-  /// TODO: Handle pre/postinc as well.
-  /// TODO: Remove default argument
-  virtual InstructionCost getScalingFactorCost(const DataLayout &DL,
-                                               const AddrMode &AM, Type *Ty,
-                                               unsigned AS = 0) const {
-    // Default: assume that any scaling factor used in a legal AM is free.
-    if (isLegalAddressingMode(DL, AM, Ty, AS))
-      return 0;
-    return -1;
-  }
-
  /// Return true if the specified immediate is legal icmp immediate, that is
  /// the target has icmp instructions which can compare a register against the
  /// immediate without having to materialize the immediate into a register.
@ -3257,8 +3252,6 @@ private:
  ValueTypeActionImpl ValueTypeActions;

 private:
-  LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const;
-
  /// Targets can specify ISD nodes that they would like PerformDAGCombine
  /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this
  /// array.
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@ -1843,41 +1843,6 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
  llvm_unreachable("Unknown instruction type encountered!");
 }

-std::pair<InstructionCost, MVT>
-TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
-                                            Type *Ty) const {
-  LLVMContext &C = Ty->getContext();
-  EVT MTy = getValueType(DL, Ty);
-
-  InstructionCost Cost = 1;
-  // We keep legalizing the type until we find a legal kind. We assume that
-  // the only operation that costs anything is the split. After splitting
-  // we need to handle two types.
-  while (true) {
-    LegalizeKind LK = getTypeConversion(C, MTy);
-
-    if (LK.first == TypeScalarizeScalableVector) {
-      // Ensure we return a sensible simple VT here, since many callers of this
-      // function require it.
-      MVT VT = MTy.isSimple() ? MTy.getSimpleVT() : MVT::i64;
-      return std::make_pair(InstructionCost::getInvalid(), VT);
-    }
-
-    if (LK.first == TypeLegal)
-      return std::make_pair(Cost, MTy.getSimpleVT());
-
-    if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger)
-      Cost *= 2;
-
-    // Do not loop with f128 type.
-    if (MTy == LK.second)
-      return std::make_pair(Cost, MTy.getSimpleVT());
-
-    // Keep legalizing the type.
-    MTy = LK.second;
-  }
-}
-
 Value *
 TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB,
                                                       bool UseTLS) const {
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -13680,22 +13680,6 @@ bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const {
  return true;
 }

-InstructionCost AArch64TargetLowering::getScalingFactorCost(
-    const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const {
-  // Scaling factors are not free at all.
-  // Operands                     | Rt Latency
-  // -------------------------------------------
-  // Rt, [Xn, Xm]                 | 4
-  // -------------------------------------------
-  // Rt, [Xn, Xm, lsl #imm]       | Rn: 4 Rm: 5
-  // Rt, [Xn, Wm, <extend> #imm]  |
-  if (isLegalAddressingMode(DL, AM, Ty, AS))
-    // Scale represents reg2 * scale, thus account for 1 if
-    // it is not equal to 0 or 1.
-    return AM.Scale != 0 && AM.Scale != 1;
-  return -1;
-}
-
 bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
    const MachineFunction &MF, EVT VT) const {
  VT = VT.getScalarType();
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@ -634,14 +634,6 @@ public:
                             unsigned AS,
                             Instruction *I = nullptr) const override;

-  /// Return the cost of the scaling factor used in the addressing
-  /// mode represented by AM for this target, for a load/store
-  /// of the specified type.
-  /// If the AM is supported, the return value must be >= 0.
-  /// If the AM is not supported, it returns a negative value.
-  InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
-                                       Type *Ty, unsigned AS) const override;
-
  /// Return true if an FMA operation is faster than a pair of fmul and fadd
  /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
  /// returns true, otherwise fmuladd is expanded to fmul + fadd.
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@ -309,7 +309,7 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  case Intrinsic::smax: {
    static const auto ValidMinMaxTys = {MVT::v8i8,  MVT::v16i8, MVT::v4i16,
                                        MVT::v8i16, MVT::v2i32, MVT::v4i32};
-    auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+    auto LT = getTypeLegalizationCost(RetTy);
    // v2i64 types get converted to cmp+bif hence the cost of 2
    if (LT.second == MVT::v2i64)
      return LT.first * 2;
@ -324,7 +324,7 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
    static const auto ValidSatTys = {MVT::v8i8,  MVT::v16i8, MVT::v4i16,
                                     MVT::v8i16, MVT::v2i32, MVT::v4i32,
                                     MVT::v2i64};
-    auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+    auto LT = getTypeLegalizationCost(RetTy);
    // This is a base cost of 1 for the vadd, plus 3 extract shifts if we
    // need to extend the type, as it uses shr(qadd(shl, shl)).
    unsigned Instrs =
@ -337,14 +337,14 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
    static const auto ValidAbsTys = {MVT::v8i8,  MVT::v16i8, MVT::v4i16,
                                     MVT::v8i16, MVT::v2i32, MVT::v4i32,
                                     MVT::v2i64};
-    auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+    auto LT = getTypeLegalizationCost(RetTy);
    if (any_of(ValidAbsTys, [&LT](MVT M) { return M == LT.second; }))
      return LT.first;
    break;
  }
  case Intrinsic::experimental_stepvector: {
    InstructionCost Cost = 1; // Cost of the `index' instruction
-    auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+    auto LT = getTypeLegalizationCost(RetTy);
    // Legalisation of illegal vectors involves an `index' instruction plus
    // (LT.first - 1) vector adds.
    if (LT.first > 1) {
@ -368,7 +368,7 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
        {Intrinsic::bitreverse, MVT::v1i64, 2},
        {Intrinsic::bitreverse, MVT::v2i64, 2},
    };
-    const auto LegalisationCost = TLI->getTypeLegalizationCost(DL, RetTy);
+    const auto LegalisationCost = getTypeLegalizationCost(RetTy);
    const auto *Entry =
        CostTableLookup(BitreverseTbl, ICA.getID(), LegalisationCost.second);
    if (Entry) {
@ -394,7 +394,7 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
        {ISD::CTPOP, MVT::v8i8,  1},
        {ISD::CTPOP, MVT::i32,   5},
    };
-    auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+    auto LT = getTypeLegalizationCost(RetTy);
    MVT MTy = LT.second;
    if (const auto *Entry = CostTableLookup(CtpopCostTbl, ISD::CTPOP, MTy)) {
      // Extra cost of +1 when illegal vector types are legalized by promoting
@ -451,7 +451,7 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
    if (ICA.getArgTypes().empty())
      break;
    bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;
-    auto LT = TLI->getTypeLegalizationCost(DL, ICA.getArgTypes()[0]);
+    auto LT = getTypeLegalizationCost(ICA.getArgTypes()[0]);
    EVT MTy = TLI->getValueType(DL, RetTy);
    // Check for the legal types, which are where the size of the input and the
    // output are the same, or we are using cvt f64->i32 or f32->i64.
@ -1534,7 +1534,7 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,

  // Legalize the destination type and ensure it can be used in a widening
  // operation.
-  auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
+  auto DstTyL = getTypeLegalizationCost(DstTy);
  unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
  if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
    return false;
@ -1542,7 +1542,7 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
  // Legalize the source type and ensure it can be used in a widening
  // operation.
  auto *SrcTy = toVectorTy(Extend->getSrcTy());
-  auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
+  auto SrcTyL = getTypeLegalizationCost(SrcTy);
  unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
  if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
    return false;
@ -1899,7 +1899,7 @@ InstructionCost AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode,
      getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);

  // Legalize the types.
-  auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
+  auto VecLT = getTypeLegalizationCost(VecTy);
  auto DstVT = TLI->getValueType(DL, Dst);
  auto SrcVT = TLI->getValueType(DL, Src);
  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
@ -1954,7 +1954,7 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,

  if (Index != -1U) {
    // Legalize the type.
-    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Val);

    // This type is legalized to a scalar type.
    if (!LT.second.isVector())
@ -1989,7 +1989,7 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
                                         Opd2PropInfo, Args, CxtI);

  // Legalize the type.
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
  int ISD = TLI->InstructionOpcodeToISD(Opcode);

  switch (ISD) {
@ -2150,7 +2150,7 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
          MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
      static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};

-      auto LT = TLI->getTypeLegalizationCost(DL, ValTy);
+      auto LT = getTypeLegalizationCost(ValTy);
      if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }) ||
          (ST->hasFullFP16() &&
           any_of(ValidFP16MinMaxTys, [&LT](MVT M) { return M == LT.second; })))
@ -2210,7 +2210,7 @@ AArch64TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
  if (useNeonVector(Src))
    return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
                                        CostKind);
-  auto LT = TLI->getTypeLegalizationCost(DL, Src);
+  auto LT = getTypeLegalizationCost(Src);
  if (!LT.first.isValid())
    return InstructionCost::getInvalid();

@ -2235,7 +2235,7 @@ InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
    return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
                                         Alignment, CostKind, I);
  auto *VT = cast<VectorType>(DataTy);
-  auto LT = TLI->getTypeLegalizationCost(DL, DataTy);
+  auto LT = getTypeLegalizationCost(DataTy);
  if (!LT.first.isValid())
    return InstructionCost::getInvalid();

@ -2272,7 +2272,7 @@ InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
    return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
                                  CostKind);

-  auto LT = TLI->getTypeLegalizationCost(DL, Ty);
+  auto LT = getTypeLegalizationCost(Ty);
  if (!LT.first.isValid())
    return InstructionCost::getInvalid();

@ -2617,7 +2617,7 @@ InstructionCost
 AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
                                       bool IsUnsigned,
                                       TTI::TargetCostKind CostKind) {
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);

  if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
    return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
@ -2641,7 +2641,7 @@ AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,

 InstructionCost AArch64TTIImpl::getArithmeticReductionCostSVE(
    unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) {
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
  InstructionCost LegalizationCost = 0;
  if (LT.first > 1) {
    Type *LegalVTy = EVT(LT.second).getTypeForEVT(ValTy->getContext());
@ -2690,7 +2690,7 @@ AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
  if (isa<ScalableVectorType>(ValTy))
    return getArithmeticReductionCostSVE(Opcode, ValTy, CostKind);

-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
  MVT MTy = LT.second;
  int ISD = TLI->InstructionOpcodeToISD(Opcode);
  assert(ISD && "Invalid opcode");
@ -2782,7 +2782,7 @@ InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) {
      { TTI::SK_Splice, MVT::nxv2f64,  1 },
  };

-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
  Type *LegalVTy = EVT(LT.second).getTypeForEVT(Tp->getContext());
  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
  EVT PromotedVT = LT.second.getScalarType() == MVT::i1
@ -2819,7 +2819,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
                                               ArrayRef<int> Mask, int Index,
                                               VectorType *SubTp,
                                               ArrayRef<const Value *> Args) {
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
  // If we have a Mask, and the LT is being legalized somehow, split the Mask
  // into smaller vectors and sum the cost of each shuffle.
  if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
@ -3016,8 +3016,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
  // move, so long as the inserted vector is "aligned".
  if (Kind == TTI::SK_InsertSubvector && LT.second.isFixedLengthVector() &&
      LT.second.getSizeInBits() <= 128 && SubTp) {
-    std::pair<InstructionCost, MVT> SubLT =
-        TLI->getTypeLegalizationCost(DL, SubTp);
+    std::pair<InstructionCost, MVT> SubLT = getTypeLegalizationCost(SubTp);
    if (SubLT.second.isVector()) {
      int NumElts = LT.second.getVectorNumElements();
      int NumSubElts = SubLT.second.getVectorNumElements();
@ -3052,3 +3051,26 @@ bool AArch64TTIImpl::preferPredicateOverEpilogue(

  return (TailFoldingKindLoc & Required) == Required;
 }
+
+InstructionCost
+AArch64TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
+                                     int64_t BaseOffset, bool HasBaseReg,
+                                     int64_t Scale, unsigned AddrSpace) const {
+  // Scaling factors are not free at all.
+  // Operands                     | Rt Latency
+  // -------------------------------------------
+  // Rt, [Xn, Xm]                 | 4
+  // -------------------------------------------
+  // Rt, [Xn, Xm, lsl #imm]       | Rn: 4 Rm: 5
+  // Rt, [Xn, Wm, <extend> #imm]  |
+  TargetLoweringBase::AddrMode AM;
+  AM.BaseGV = BaseGV;
+  AM.BaseOffs = BaseOffset;
+  AM.HasBaseReg = HasBaseReg;
+  AM.Scale = Scale;
+  if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace))
+    // Scale represents reg2 * scale, thus account for 1 if
+    // it is not equal to 0 or 1.
+    return AM.Scale != 0 && AM.Scale != 1;
+  return -1;
+}
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@ -371,6 +371,15 @@ public:
                                 ArrayRef<int> Mask, int Index,
                                 VectorType *SubTp,
                                 ArrayRef<const Value *> Args = None);
+
+  /// Return the cost of the scaling factor used in the addressing
+  /// mode represented by AM for this target, for a load/store
+  /// of the specified type.
+  /// If the AM is supported, the return value must be >= 0.
+  /// If the AM is not supported, it returns a negative value.
+  InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
+                                       int64_t BaseOffset, bool HasBaseReg,
+                                       int64_t Scale, unsigned AddrSpace) const;
  /// @}
 };

--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@ -518,7 +518,7 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost(
    const Instruction *CxtI) {

  // Legalize the type.
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
  int ISD = TLI->InstructionOpcodeToISD(Opcode);

  // Because we don't have any legal vector operations, but the legal types, we
@ -690,7 +690,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  Type *RetTy = ICA.getReturnType();

  // Legalize the type.
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);

  unsigned NElts = LT.second.isVector() ?
    LT.second.getVectorNumElements() : 1;
@ -769,7 +769,7 @@ GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
  if (!ST->hasVOP3PInsts() || OrigTy.getScalarSizeInBits() != 16)
    return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);

-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
  return LT.first * getFullRateInstrCost();
 }

@ -784,7 +784,7 @@ GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
  if (!ST->hasVOP3PInsts() || OrigTy.getScalarSizeInBits() != 16)
    return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);

-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
  return LT.first * getHalfRateInstrCost(CostKind);
 }

@ -1148,3 +1148,17 @@ int GCNTTIImpl::get64BitInstrCost(TTI::TargetCostKind CostKind) const {
             : ST->hasHalfRate64Ops() ? getHalfRateInstrCost(CostKind)
                                      : getQuarterRateInstrCost(CostKind);
 }
+
+std::pair<InstructionCost, MVT>
+GCNTTIImpl::getTypeLegalizationCost(Type *Ty) const {
+  std::pair<InstructionCost, MVT> Cost = BaseT::getTypeLegalizationCost(Ty);
+  auto Size = DL.getTypeSizeInBits(Ty);
+  // Maximum load or store can handle 8 dwords for scalar and 4 for
+  // vector ALU. Let's assume anything above 8 dwords is expensive
+  // even if legal.
+  if (Size <= 256)
+    return Cost;
+
+  Cost.first += (Size + 255) / 256;
+  return Cost;
+}
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@ -94,6 +94,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
  // quarter. This also applies to some integer operations.
  int get64BitInstrCost(TTI::TargetCostKind CostKind) const;

+  std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const;
+
 public:
  explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);

--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@ -12947,22 +12947,6 @@ bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
  return hasCFUser(V, Visited, Subtarget->getWavefrontSize());
 }

-std::pair<InstructionCost, MVT>
-SITargetLowering::getTypeLegalizationCost(const DataLayout &DL,
-                                          Type *Ty) const {
-  std::pair<InstructionCost, MVT> Cost =
-      TargetLoweringBase::getTypeLegalizationCost(DL, Ty);
-  auto Size = DL.getTypeSizeInBits(Ty);
-  // Maximum load or store can handle 8 dwords for scalar and 4 for
-  // vector ALU. Let's assume anything above 8 dwords is expensive
-  // even if legal.
-  if (Size <= 256)
-    return Cost;
-
-  Cost.first += (Size + 255) / 256;
-  return Cost;
-}
-
 bool SITargetLowering::hasMemSDNodeUser(SDNode *N) const {
  SDNode::use_iterator I = N->use_begin(), E = N->use_end();
  for (; I != E; ++I) {
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@ -525,9 +525,6 @@ public:
                                      const SIRegisterInfo &TRI,
                                      SIMachineFunctionInfo &Info) const;

-  std::pair<InstructionCost, MVT> getTypeLegalizationCost(const DataLayout &DL,
-                                                          Type *Ty) const;
-
  MachineMemOperand::Flags
  getTargetMMOFlags(const Instruction &I) const override;
 };
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@ -19147,18 +19147,6 @@ bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
  return true;
 }

-InstructionCost ARMTargetLowering::getScalingFactorCost(const DataLayout &DL,
-                                                        const AddrMode &AM,
-                                                        Type *Ty,
-                                                        unsigned AS) const {
-  if (isLegalAddressingMode(DL, AM, Ty, AS)) {
-    if (Subtarget->hasFPAO())
-      return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster
-    return 0;
-  }
-  return -1;
-}
-
 /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
 /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
 /// expanded to FMAs when this method returns true, otherwise fmuladd is
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@ -470,14 +470,6 @@ class VectorType;
                               Type *Ty, unsigned AS,
                               Instruction *I = nullptr) const override;

-    /// getScalingFactorCost - Return the cost of the scaling used in
-    /// addressing mode represented by AM.
-    /// If the AM is supported, the return value must be >= 0.
-    /// If the AM is not supported, the return value must be negative.
-    InstructionCost getScalingFactorCost(const DataLayout &DL,
-                                         const AddrMode &AM, Type *Ty,
-                                         unsigned AS) const override;
-
    bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;

    /// Returns true if the addressing mode representing by AM is legal
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@ -634,7 +634,7 @@ InstructionCost ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
        {ISD::FP_EXTEND, MVT::v2f32, 2},
        {ISD::FP_EXTEND, MVT::v4f32, 4}};

-    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
    if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
      return AdjustCost(LT.first * Entry->Cost);
  }
@ -901,7 +901,7 @@ InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
    // sometimes just be vmovs. Integer involve being passes to GPR registers,
    // causing more of a delay.
    std::pair<InstructionCost, MVT> LT =
-        getTLI()->getTypeLegalizationCost(DL, ValTy->getScalarType());
+        getTypeLegalizationCost(ValTy->getScalarType());
    return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1);
  }

@ -926,7 +926,7 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
    // - may require one or more conditional mov (including an IT),
    // - can't operate directly on immediates,
    // - require live flags, which we can't copy around easily.
-    InstructionCost Cost = TLI->getTypeLegalizationCost(DL, ValTy).first;
+    InstructionCost Cost = getTypeLegalizationCost(ValTy).first;

    // Possible IT instruction for Thumb2, or more for Thumb1.
    ++Cost;
@ -1003,8 +1003,7 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
        return Entry->Cost;
    }

-    std::pair<InstructionCost, MVT> LT =
-        TLI->getTypeLegalizationCost(DL, ValTy);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
    return LT.first;
  }

@ -1028,8 +1027,7 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
                                    I);
    }

-    std::pair<InstructionCost, MVT> LT =
-        TLI->getTypeLegalizationCost(DL, ValTy);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
    int BaseCost = ST->getMVEVectorCostFactor(CostKind);
    // There are two types - the input that specifies the type of the compare
    // and the output vXi1 type. Because we don't know how the output will be
@ -1222,7 +1220,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
          {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
          {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};

-      std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+      std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
      if (const auto *Entry =
              CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, LT.second))
        return LT.first * Entry->Cost;
@ -1243,7 +1241,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
          {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
          {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};

-      std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+      std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
      if (const auto *Entry =
              CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
        return LT.first * Entry->Cost;
@ -1267,7 +1265,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,

          {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};

-      std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+      std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
      if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
                                              ISD::VECTOR_SHUFFLE, LT.second))
        return LT.first * Entry->Cost;
@ -1283,7 +1281,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
          {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
          {ISD::VECTOR_SHUFFLE, MVT::v8f16, 1}};

-      std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+      std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
      if (const auto *Entry = CostTableLookup(MVEDupTbl, ISD::VECTOR_SHUFFLE,
                                              LT.second))
        return LT.first * Entry->Cost *
@ -1291,7 +1289,7 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
    }

    if (!Mask.empty()) {
-      std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+      std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
      if (LT.second.isVector() &&
          Mask.size() <= LT.second.getVectorNumElements() &&
          (isVREVMask(Mask, LT.second, 16) || isVREVMask(Mask, LT.second, 32) ||
@ -1328,7 +1326,7 @@ InstructionCost ARMTTIImpl::getArithmeticInstrCost(
    }
  }

-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);

  if (ST->hasNEON()) {
    const unsigned FunctionCallDivCost = 20;
@ -1467,7 +1465,7 @@ InstructionCost ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
      cast<VectorType>(Src)->getElementType()->isDoubleTy()) {
    // Unaligned loads/stores are extremely inefficient.
    // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
-    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
    return LT.first * 4;
  }

@ -1568,7 +1566,7 @@ InstructionCost ARMTTIImpl::getGatherScatterOpCost(

  unsigned NumElems = VTy->getNumElements();
  unsigned EltSize = VTy->getScalarSizeInBits();
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, DataTy);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(DataTy);

  // For now, it is assumed that for the MVE gather instructions the loads are
  // all effectively serialised. This means the cost is the scalar cost
@ -1664,7 +1662,7 @@ ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
  if (!ST->hasMVEIntegerOps() || !ValVT.isSimple() || ISD != ISD::ADD)
    return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);

-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);

  static const CostTblEntry CostTblAdd[]{
      {ISD::ADD, MVT::v16i8, 1},
@ -1688,8 +1686,7 @@ InstructionCost ARMTTIImpl::getExtendedReductionCost(
  switch (ISD) {
  case ISD::ADD:
    if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
-      std::pair<InstructionCost, MVT> LT =
-          TLI->getTypeLegalizationCost(DL, ValTy);
+      std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);

      // The legal cases are:
      //   VADDV u/s 8/16/32
@ -1720,8 +1717,7 @@ ARMTTIImpl::getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
  EVT ResVT = TLI->getValueType(DL, ResTy);

  if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
-    std::pair<InstructionCost, MVT> LT =
-        TLI->getTypeLegalizationCost(DL, ValTy);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);

    // The legal cases are:
    //   VMLAV u/s 8/16/32
@ -1763,7 +1759,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
      break;
    Type *VT = ICA.getReturnType();

-    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VT);
    if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
        LT.second == MVT::v16i8) {
      // This is a base cost of 1 for the vqadd, plus 3 extract shifts if we
@ -1783,7 +1779,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
      break;
    Type *VT = ICA.getReturnType();

-    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VT);
    if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
        LT.second == MVT::v16i8)
      return LT.first * ST->getMVEVectorCostFactor(CostKind);
@ -1794,7 +1790,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
    if (!ST->hasMVEFloatOps())
      break;
    Type *VT = ICA.getReturnType();
-    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VT);
    if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16)
      return LT.first * ST->getMVEVectorCostFactor(CostKind);
    break;
@ -1804,7 +1800,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
    if (ICA.getArgTypes().empty())
      break;
    bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;
-    auto LT = TLI->getTypeLegalizationCost(DL, ICA.getArgTypes()[0]);
+    auto LT = getTypeLegalizationCost(ICA.getArgTypes()[0]);
    EVT MTy = TLI->getValueType(DL, ICA.getReturnType());
    // Check for the legal types, with the corect subtarget features.
    if ((ST->hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) ||
@ -2416,3 +2412,20 @@ bool ARMTTIImpl::preferPredicatedReductionSelect(
    return false;
  return true;
 }
+
+InstructionCost ARMTTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
+                                                 int64_t BaseOffset,
+                                                 bool HasBaseReg, int64_t Scale,
+                                                 unsigned AddrSpace) const {
+  TargetLoweringBase::AddrMode AM;
+  AM.BaseGV = BaseGV;
+  AM.BaseOffs = BaseOffset;
+  AM.HasBaseReg = HasBaseReg;
+  AM.Scale = Scale;
+  if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace)) {
+    if (ST->hasFPAO())
+      return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster
+    return 0;
+  }
+  return -1;
+}
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@ -287,6 +287,14 @@ public:
  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                                        TTI::TargetCostKind CostKind);

+  /// getScalingFactorCost - Return the cost of the scaling used in
+  /// addressing mode represented by AM.
+  /// If the AM is supported, the return value must be >= 0.
+  /// If the AM is not supported, the return value must be negative.
+  InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
+                                       int64_t BaseOffset, bool HasBaseReg,
+                                       int64_t Scale, unsigned AddrSpace) const;
+
  bool maybeLoweredToCall(Instruction &I);
  bool isLoweredToCall(const Function *F);
  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@ -145,7 +145,7 @@ HexagonTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                                      TTI::TargetCostKind CostKind) {
  if (ICA.getID() == Intrinsic::bswap) {
    std::pair<InstructionCost, MVT> LT =
-        TLI.getTypeLegalizationCost(DL, ICA.getReturnType());
+        getTypeLegalizationCost(ICA.getReturnType());
    return LT.first + 2;
  }
  return BaseT::getIntrinsicInstrCost(ICA, CostKind);
@ -254,7 +254,7 @@ InstructionCost HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
                                                   TTI::TargetCostKind CostKind,
                                                   const Instruction *I) {
  if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) {
-    std::pair<InstructionCost, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
    if (Opcode == Instruction::FCmp)
      return LT.first + FloatFactor * getTypeNumElements(ValTy);
  }
@ -274,7 +274,7 @@ InstructionCost HexagonTTIImpl::getArithmeticInstrCost(
                                         Opd2PropInfo, Args, CxtI);

  if (Ty->isVectorTy()) {
-    std::pair<InstructionCost, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
    if (LT.second.isFloatingPoint())
      return LT.first + FloatFactor * getTypeNumElements(Ty);
  }
@ -291,10 +291,8 @@ InstructionCost HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
    unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
    unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;

-    std::pair<InstructionCost, MVT> SrcLT =
-        TLI.getTypeLegalizationCost(DL, SrcTy);
-    std::pair<InstructionCost, MVT> DstLT =
-        TLI.getTypeLegalizationCost(DL, DstTy);
+    std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(SrcTy);
+    std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(DstTy);
    InstructionCost Cost =
        std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
    // TODO: Allow non-throughput costs that aren't binary.
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@ -428,7 +428,7 @@ InstructionCost NVPTXTTIImpl::getArithmeticInstrCost(
    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
    const Instruction *CxtI) {
  // Legalize the type.
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);

  int ISD = TLI->InstructionOpcodeToISD(Opcode);

--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@ -331,8 +331,7 @@ InstructionCost PPCTTIImpl::getUserCost(const User *U,

  if (U->getType()->isVectorTy()) {
    // Instructions that need to be split should cost more.
-    std::pair<InstructionCost, MVT> LT =
-        TLI->getTypeLegalizationCost(DL, U->getType());
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(U->getType());
    return LT.first * BaseT::getUserCost(U, Operands, CostKind);
  }

@ -960,7 +959,7 @@ InstructionCost PPCTTIImpl::vectorCostAdjustmentFactor(unsigned Opcode,
  if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
    return InstructionCost(1);

-  std::pair<InstructionCost, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);
+  std::pair<InstructionCost, MVT> LT1 = getTypeLegalizationCost(Ty1);
  // If type legalization involves splitting the vector, we don't want to
  // double the cost at every step - only the last step.
  if (LT1.first != 1 || !LT1.second.isVector())
@ -971,7 +970,7 @@ InstructionCost PPCTTIImpl::vectorCostAdjustmentFactor(unsigned Opcode,
    return InstructionCost(1);

  if (Ty2) {
-    std::pair<InstructionCost, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);
+    std::pair<InstructionCost, MVT> LT2 = getTypeLegalizationCost(Ty2);
    if (LT2.first != 1 || !LT2.second.isVector())
      return InstructionCost(1);
  }
@ -1014,7 +1013,7 @@ InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
    return InstructionCost::getMax();

  // Legalize the type.
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);

  // PPC, for both Altivec/VSX, support cheap arbitrary permutations
  // (at least in the sense that there need only be one non-loop-invariant
@ -1156,7 +1155,7 @@ InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
    return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
                                  CostKind);
  // Legalize the type.
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
         "Invalid Opcode");

@ -1246,7 +1245,7 @@ InstructionCost PPCTTIImpl::getInterleavedMemoryOpCost(
         "Expect a vector type for interleaved memory op");

  // Legalize the type.
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VecTy);

  // Firstly, the cost of load/store operation.
  InstructionCost Cost = getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment),
@ -1427,8 +1426,7 @@ InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src,
  assert(SrcVTy && "Expected a vector type for VP memory operations");

  if (hasActiveVectorLength(Opcode, Src, Alignment)) {
-    std::pair<InstructionCost, MVT> LT =
-        TLI->getTypeLegalizationCost(DL, SrcVTy);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(SrcVTy);

    InstructionCost CostFactor =
        vectorCostAdjustmentFactor(Opcode, Src, nullptr);
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@ -168,7 +168,7 @@ RISCVTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
 }

 InstructionCost RISCVTTIImpl::getSpliceCost(VectorType *Tp, int Index) {
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);

  unsigned Cost = 2; // vslidedown+vslideup.
  // TODO: LMUL should increase cost.
@ -182,7 +182,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
                                             int Index, VectorType *SubTp,
                                             ArrayRef<const Value *> Args) {
  if (isa<ScalableVectorType>(Tp)) {
-    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
    switch (Kind) {
    default:
      // Fallthrough to generic handling.
@ -257,7 +257,7 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  // TODO: add more intrinsic
  case Intrinsic::experimental_stepvector: {
    unsigned Cost = 1; // vid
-    auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+    auto LT = getTypeLegalizationCost(RetTy);
    return Cost + (LT.first - 1);
  }
  default:
@ -364,7 +364,7 @@ RISCVTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
  if (Ty->getScalarSizeInBits() > ST->getELEN())
    return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);

-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
  if (Ty->getElementType()->isIntegerTy(1))
    // vcpop sequences, see vreduction-mask.ll.  umax, smin actually only
    // cost 2, but we don't have enough info here so we slightly over cost.
@ -394,7 +394,7 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
      ISD != ISD::FADD)
    return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);

-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
  if (Ty->getElementType()->isIntegerTy(1))
    // vcpop sequences, see vreduction-mask.ll
    return (LT.first - 1) + (ISD == ISD::AND ? 3 : 2);
@ -423,7 +423,7 @@ InstructionCost RISCVTTIImpl::getExtendedReductionCost(
    return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
                                           FMF, CostKind);

-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);

  if (ResTy->getScalarSizeInBits() != 2 * LT.second.getScalarSizeInBits())
    return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -56478,35 +56478,6 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
  return Res;
 }

-InstructionCost X86TargetLowering::getScalingFactorCost(const DataLayout &DL,
-                                                        const AddrMode &AM,
-                                                        Type *Ty,
-                                                        unsigned AS) const {
-  // Scaling factors are not free at all.
-  // An indexed folded instruction, i.e., inst (reg1, reg2, scale),
-  // will take 2 allocations in the out of order engine instead of 1
-  // for plain addressing mode, i.e. inst (reg1).
-  // E.g.,
-  // vaddps (%rsi,%rdx), %ymm0, %ymm1
-  // Requires two allocations (one for the load, one for the computation)
-  // whereas:
-  // vaddps (%rsi), %ymm0, %ymm1
-  // Requires just 1 allocation, i.e., freeing allocations for other operations
-  // and having less micro operations to execute.
-  //
-  // For some X86 architectures, this is even worse because for instance for
-  // stores, the complex addressing mode forces the instruction to use the
-  // "load" ports instead of the dedicated "store" port.
-  // E.g., on Haswell:
-  // vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
-  // vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
-  if (isLegalAddressingMode(DL, AM, Ty, AS))
-    // Scale represents reg2 * scale, thus account for 1
-    // as soon as we use a second register.
-    return AM.Scale != 0;
-  return -1;
-}
-
 bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
  // Integer division on x86 is expensive. However, when aggressively optimizing
  // for code size, we prefer to use a div instruction, as it is usually smaller
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@ -1240,15 +1240,6 @@ namespace llvm {

    bool isLegalStoreImmediate(int64_t Imm) const override;

-    /// Return the cost of the scaling factor used in the addressing
-    /// mode represented by AM for this target, for a load/store
-    /// of the specified type.
-    /// If the AM is supported, the return value must be >= 0.
-    /// If the AM is not supported, it returns a negative value.
-    InstructionCost getScalingFactorCost(const DataLayout &DL,
-                                         const AddrMode &AM, Type *Ty,
-                                         unsigned AS) const override;
-
    /// This is used to enable splatted operand transforms for vector shifts
    /// and vector funnel shifts.
    bool isVectorShiftByScalarCheap(Type *Ty) const override;
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@ -202,7 +202,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
  }

  // Legalize the type.
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);

  int ISD = TLI->InstructionOpcodeToISD(Opcode);
  assert(ISD && "Invalid opcode");
@ -1089,7 +1089,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
                                           ArrayRef<const Value *> Args) {
  // 64-bit packed float vectors (v2f32) are widened to type v4f32.
  // 64-bit packed integer vectors (v2i32) are widened to type v4i32.
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, BaseTp);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(BaseTp);

  Kind = improveShuffleKindFromMask(Kind, Mask);
  // Treat Transpose as 2-op shuffles - there's no difference in lowering.
@ -1108,8 +1108,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
    int NumElts = LT.second.getVectorNumElements();
    if ((Index % NumElts) == 0)
      return 0;
-    std::pair<InstructionCost, MVT> SubLT =
-        TLI->getTypeLegalizationCost(DL, SubTp);
+    std::pair<InstructionCost, MVT> SubLT = getTypeLegalizationCost(SubTp);
    if (SubLT.second.isVector()) {
      int NumSubElts = SubLT.second.getVectorNumElements();
      if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
@ -1155,8 +1154,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
  // isn't free, because we need to preserve the rest of the wide vector.
  if (Kind == TTI::SK_InsertSubvector && LT.second.isVector()) {
    int NumElts = LT.second.getVectorNumElements();
-    std::pair<InstructionCost, MVT> SubLT =
-        TLI->getTypeLegalizationCost(DL, SubTp);
+    std::pair<InstructionCost, MVT> SubLT = getTypeLegalizationCost(SubTp);
    if (SubLT.second.isVector()) {
      int NumSubElts = SubLT.second.getVectorNumElements();
      if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
@ -2528,9 +2526,8 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
  }

  // Fall back to legalized types.
-  std::pair<InstructionCost, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
-  std::pair<InstructionCost, MVT> LTDest =
-      TLI->getTypeLegalizationCost(DL, Dst);
+  std::pair<InstructionCost, MVT> LTSrc = getTypeLegalizationCost(Src);
+  std::pair<InstructionCost, MVT> LTDest = getTypeLegalizationCost(Dst);

  // If we're truncating to the same legalized type - just assume its free.
  if (ISD == ISD::TRUNCATE && LTSrc.second == LTDest.second)
@ -2630,7 +2627,7 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
                                     I);

  // Legalize the type.
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);

  MVT MTy = LT.second;

@ -3395,7 +3392,7 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,

  if (ISD != ISD::DELETED_NODE) {
    // Legalize the type.
-    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, OpTy);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(OpTy);
    MVT MTy = LT.second;

    // Attempt to lookup cost.
@ -3629,8 +3626,7 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,

  if (ISD != ISD::DELETED_NODE) {
    // Legalize the type.
-    std::pair<InstructionCost, MVT> LT =
-        TLI->getTypeLegalizationCost(DL, RetTy);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
    MVT MTy = LT.second;

    // Attempt to lookup cost.
@ -3709,7 +3705,7 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
      return 1;

    // Legalize the type.
-    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
+    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Val);

    // This type is legalized to a scalar type.
    if (!LT.second.isVector())
@ -3797,7 +3793,7 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
             cast<FixedVectorType>(Ty)->getNumElements() &&
         "Vector size mismatch");

-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
  MVT MScalarTy = LT.second.getScalarType();
  unsigned SizeInBits = LT.second.getSizeInBits();

@ -3987,10 +3983,10 @@ X86TTIImpl::getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
  auto *DstVecTy = FixedVectorType::get(EltTy, NumDstElements);

  // Legalize the types.
-  MVT LegalSrcVecTy = TLI->getTypeLegalizationCost(DL, SrcVecTy).second;
-  MVT LegalPromSrcVecTy = TLI->getTypeLegalizationCost(DL, PromSrcVecTy).second;
-  MVT LegalPromDstVecTy = TLI->getTypeLegalizationCost(DL, PromDstVecTy).second;
-  MVT LegalDstVecTy = TLI->getTypeLegalizationCost(DL, DstVecTy).second;
+  MVT LegalSrcVecTy = getTypeLegalizationCost(SrcVecTy).second;
+  MVT LegalPromSrcVecTy = getTypeLegalizationCost(PromSrcVecTy).second;
+  MVT LegalPromDstVecTy = getTypeLegalizationCost(PromDstVecTy).second;
+  MVT LegalDstVecTy = getTypeLegalizationCost(DstVecTy).second;
  // They should have legalized into vector types.
  if (!LegalSrcVecTy.isVector() || !LegalPromSrcVecTy.isVector() ||
      !LegalPromDstVecTy.isVector() || !LegalDstVecTy.isVector())
@ -4064,7 +4060,7 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                  CostKind);

  // Legalize the type.
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);

  auto *VTy = dyn_cast<FixedVectorType>(Src);

@ -4227,7 +4223,7 @@ X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment,
  }

  // Legalize the type.
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(SrcVTy);
  auto VT = TLI->getValueType(DL, SrcVTy);
  InstructionCost Cost = 0;
  if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
@ -4343,7 +4339,7 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
        return Entry->Cost;
  }

-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);

  MVT MTy = LT.second;

@ -4531,7 +4527,7 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,

 InstructionCost X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy,
                                          bool IsUnsigned) {
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);

  MVT MTy = LT.second;

@ -4661,7 +4657,7 @@ InstructionCost
 X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
                                   bool IsUnsigned,
                                   TTI::TargetCostKind CostKind) {
-  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);

  MVT MTy = LT.second;

@ -5088,10 +5084,8 @@ InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy,

  auto *IndexVTy = FixedVectorType::get(
      IntegerType::get(SrcVTy->getContext(), IndexSize), VF);
-  std::pair<InstructionCost, MVT> IdxsLT =
-      TLI->getTypeLegalizationCost(DL, IndexVTy);
-  std::pair<InstructionCost, MVT> SrcLT =
-      TLI->getTypeLegalizationCost(DL, SrcVTy);
+  std::pair<InstructionCost, MVT> IdxsLT = getTypeLegalizationCost(IndexVTy);
+  std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(SrcVTy);
  InstructionCost::CostType SplitFactor =
      *std::max(IdxsLT.first, SrcLT.first).getValue();
  if (SplitFactor > 1) {
@ -5533,7 +5527,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(

  // Calculate the number of memory operations (NumOfMemOps), required
  // for load/store the VecTy.
-  MVT LegalVT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
+  MVT LegalVT = getTypeLegalizationCost(VecTy).second;
  unsigned VecTySize = DL.getTypeStoreSize(VecTy);
  unsigned LegalVTSize = LegalVT.getStoreSize();
  unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
@ -5613,8 +5607,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
    auto *ResultTy = FixedVectorType::get(VecTy->getElementType(),
                                          VecTy->getNumElements() / Factor);
    InstructionCost NumOfResults =
-        getTLI()->getTypeLegalizationCost(DL, ResultTy).first *
-        NumOfLoadsInInterleaveGrp;
+        getTypeLegalizationCost(ResultTy).first * NumOfLoadsInInterleaveGrp;

    // About a half of the loads may be folded in shuffles when we have only
    // one result. If we have more than one result, or the loads are masked,
@ -5711,7 +5704,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
  // VecTy for interleave memop is <VF*Factor x Elt>.
  // So, for VF=4, Interleave Factor = 3, Element type = i32 we have
  // VecTy = <12 x i32>.
-  MVT LegalVT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
+  MVT LegalVT = getTypeLegalizationCost(VecTy).second;

  // This function can be called with VecTy=<6xi128>, Factor=3, in which case
  // the VF=2, while v2i128 is an unsupported MVT vector type
@ -5989,3 +5982,37 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
                                           Alignment, AddressSpace, CostKind,
                                           UseMaskForCond, UseMaskForGaps);
 }
+
+InstructionCost X86TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
+                                                 int64_t BaseOffset,
+                                                 bool HasBaseReg, int64_t Scale,
+                                                 unsigned AddrSpace) const {
+  // Scaling factors are not free at all.
+  // An indexed folded instruction, i.e., inst (reg1, reg2, scale),
+  // will take 2 allocations in the out of order engine instead of 1
+  // for plain addressing mode, i.e. inst (reg1).
+  // E.g.,
+  // vaddps (%rsi,%rdx), %ymm0, %ymm1
+  // Requires two allocations (one for the load, one for the computation)
+  // whereas:
+  // vaddps (%rsi), %ymm0, %ymm1
+  // Requires just 1 allocation, i.e., freeing allocations for other operations
+  // and having less micro operations to execute.
+  //
+  // For some X86 architectures, this is even worse because for instance for
+  // stores, the complex addressing mode forces the instruction to use the
+  // "load" ports instead of the dedicated "store" port.
+  // E.g., on Haswell:
+  // vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
+  // vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
+  TargetLoweringBase::AddrMode AM;
+  AM.BaseGV = BaseGV;
+  AM.BaseOffs = BaseOffset;
+  AM.HasBaseReg = HasBaseReg;
+  AM.Scale = Scale;
+  if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace))
+    // Scale represents reg2 * scale, thus account for 1
+    // as soon as we use a second register.
+    return AM.Scale != 0;
+  return -1;
+}
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@ -226,6 +226,15 @@ public:
  InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
                                      const APInt &Imm, Type *Ty,
                                      TTI::TargetCostKind CostKind);
+  /// Return the cost of the scaling factor used in the addressing
+  /// mode represented by AM for this target, for a load/store
+  /// of the specified type.
+  /// If the AM is supported, the return value must be >= 0.
+  /// If the AM is not supported, it returns a negative value.
+  InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
+                                       int64_t BaseOffset, bool HasBaseReg,
+                                       int64_t Scale, unsigned AddrSpace) const;
+
  bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
                     const TargetTransformInfo::LSRCost &C2);
  bool canMacroFuseCmp();