[ARM] Teach the Arm cost model that a Shift can be folded into other instructions

This attempts to teach the cost model in Arm that code such as: %s = shl i32 %a, 3 %a = and i32 %s, %b Can under Arm or Thumb2 become: and r0, r1, r2, lsl #3 So the cost of the shift can essentially be free. To do this without trying to artificially adjust the cost of the "and" instruction, it needs to get the users of the shl and check if they are a type of instruction that the shift can be folded into. And so it needs to have access to the actual instruction in getArithmeticInstrCost, which if available is added as an extra parameter much like getCastInstrCost. We otherwise limit it to shifts with a single user, which should hopefully handle most of the cases. The list of instruction that the shift can be folded into include ADC, ADD, AND, BIC, CMP, EOR, MVN, ORR, ORN, RSB, SBC and SUB. This translates to Add, Sub, And, Or, Xor and ICmp. Differential Revision: https://reviews.llvm.org/D70966
2019-12-08 15:33:24 +00:00 · 2019-12-08 15:33:24 +00:00 · be7a107070
parent f008b5b8ce
commit be7a107070
28 changed files with 157 additions and 102 deletions
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@ -901,12 +901,15 @@ public:
  /// \p Args is an optional argument which holds the instruction operands
  /// values so the TTI can analyze those values searching for special
  /// cases or optimizations based on those values.
+  /// \p CxtI is the optional original context instruction, if one exists, to
+  /// provide even more information.
  int getArithmeticInstrCost(
      unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
      OperandValueKind Opd2Info = OK_AnyValue,
      OperandValueProperties Opd1PropInfo = OP_None,
      OperandValueProperties Opd2PropInfo = OP_None,
-      ArrayRef<const Value *> Args = ArrayRef<const Value *>()) const;
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+      const Instruction *CxtI = nullptr) const;

  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
  /// The index and subtype parameters are used by the subvector insertion and
@ -1309,12 +1312,11 @@ public:
  virtual unsigned getMaxPrefetchIterationsAhead() const = 0;

  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
-  virtual unsigned
-  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
-                         OperandValueKind Opd2Info,
-                         OperandValueProperties Opd1PropInfo,
-                         OperandValueProperties Opd2PropInfo,
-                         ArrayRef<const Value *> Args) = 0;
+  virtual unsigned getArithmeticInstrCost(
+      unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
+      OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
+      OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+      const Instruction *CxtI = nullptr) = 0;
  virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
                             Type *SubTp) = 0;
  virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
@ -1709,14 +1711,15 @@ public:
                                            BlockFrequencyInfo *BFI) override {
    return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
  }
-  unsigned
-  getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
-                         OperandValueKind Opd2Info,
-                         OperandValueProperties Opd1PropInfo,
-                         OperandValueProperties Opd2PropInfo,
-                         ArrayRef<const Value *> Args) override {
+  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                  OperandValueKind Opd1Info,
+                                  OperandValueKind Opd2Info,
+                                  OperandValueProperties Opd1PropInfo,
+                                  OperandValueProperties Opd2PropInfo,
+                                  ArrayRef<const Value *> Args,
+                                  const Instruction *CxtI = nullptr) override {
    return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
-                                       Opd1PropInfo, Opd2PropInfo, Args);
+                                       Opd1PropInfo, Opd2PropInfo, Args, CxtI);
  }
  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
                     Type *SubTp) override {
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@ -430,7 +430,8 @@ public:
                                  TTI::OperandValueKind Opd2Info,
                                  TTI::OperandValueProperties Opd1PropInfo,
                                  TTI::OperandValueProperties Opd2PropInfo,
-                                  ArrayRef<const Value *> Args) {
+                                  ArrayRef<const Value *> Args,
+                                  const Instruction *CxtI = nullptr) {
    return 1;
  }

--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@ -633,7 +633,8 @@ public:
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
-      ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+      const Instruction *CxtI = nullptr) {
    // Check if any of the operands are vector operands.
    const TargetLoweringBase *TLI = getTLI();
    int ISD = TLI->InstructionOpcodeToISD(Opcode);
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@ -129,7 +129,7 @@ public:
  bool isUnaryOp() const { return isUnaryOp(getOpcode()); }
  bool isBinaryOp() const { return isBinaryOp(getOpcode()); }
  bool isIntDivRem() const { return isIntDivRem(getOpcode()); }
-  bool isShift() { return isShift(getOpcode()); }
+  bool isShift() const { return isShift(getOpcode()); }
  bool isCast() const { return isCast(getOpcode()); }
  bool isFuncletPad() const { return isFuncletPad(getOpcode()); }
  bool isExceptionalTerminator() const {
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@ -592,10 +592,10 @@ TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) {
 int TargetTransformInfo::getArithmeticInstrCost(
    unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
    OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
-    OperandValueProperties Opd2PropInfo,
-    ArrayRef<const Value *> Args) const {
-  int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
-                                             Opd1PropInfo, Opd2PropInfo, Args);
+    OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+    const Instruction *CxtI) const {
+  int Cost = TTIImpl->getArithmeticInstrCost(
+      Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI);
  assert(Cost >= 0 && "TTI should not produce negative costs!");
  return Cost;
 }
@ -1183,7 +1183,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
    Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
    SmallVector<const Value *, 2> Operands(I->operand_values());
    return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
-                                  Op1VP, Op2VP, Operands);
+                                  Op1VP, Op2VP, Operands, I);
  }
  case Instruction::FNeg: {
    TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
@ -1193,7 +1193,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
    Op2VP = OP_None;
    SmallVector<const Value *, 2> Operands(I->operand_values());
    return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
-                                  Op1VP, Op2VP, Operands);
+                                  Op1VP, Op2VP, Operands, I);
  }
  case Instruction::Select: {
    const SelectInst *SI = cast<SelectInst>(I);
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@ -484,7 +484,8 @@ int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
 int AArch64TTIImpl::getArithmeticInstrCost(
    unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
    TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
-    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
+    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+    const Instruction *CxtI) {
  // Legalize the type.
  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@ -124,7 +124,8 @@ public:
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
-      ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+      const Instruction *CxtI = nullptr);

  int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);

--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@ -338,10 +338,13 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
  }
 }

-int GCNTTIImpl::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
-    TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
-    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args ) {
+int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                       TTI::OperandValueKind Opd1Info,
+                                       TTI::OperandValueKind Opd2Info,
+                                       TTI::OperandValueProperties Opd1PropInfo,
+                                       TTI::OperandValueProperties Opd2PropInfo,
+                                       ArrayRef<const Value *> Args,
+                                       const Instruction *CxtI) {
  EVT OrigTy = TLI->getValueType(DL, Ty);
  if (!OrigTy.isSimple()) {
    return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
@ -801,7 +804,7 @@ unsigned GCNTTIImpl::getUserCost(const User *U,
  case Instruction::FNeg: {
    return getArithmeticInstrCost(I->getOpcode(), I->getType(),
                                  TTI::OK_AnyValue, TTI::OK_AnyValue,
-                                  TTI::OP_None, TTI::OP_None, Operands);
+                                  TTI::OP_None, TTI::OP_None, Operands, I);
  }
  default:
    break;
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@ -172,12 +172,13 @@ public:
  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;

  int getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty,
-    TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
-    TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
-    TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
-    TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
-    ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+      unsigned Opcode, Type *Ty,
+      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+      const Instruction *CxtI = nullptr);

  unsigned getCFInstrCost(unsigned Opcode);

--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@ -642,11 +642,13 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
  return BaseCost * BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
 }

-int ARMTTIImpl::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
-    TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
-    TTI::OperandValueProperties Opd2PropInfo,
-    ArrayRef<const Value *> Args) {
+int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                       TTI::OperandValueKind Op1Info,
+                                       TTI::OperandValueKind Op2Info,
+                                       TTI::OperandValueProperties Opd1PropInfo,
+                                       TTI::OperandValueProperties Opd2PropInfo,
+                                       ArrayRef<const Value *> Args,
+                                       const Instruction *CxtI) {
  int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

@ -714,6 +716,33 @@ int ARMTTIImpl::getArithmeticInstrCost(
    return Cost;
  }

+  // If this operation is a shift on arm/thumb2, it might well be folded into
+  // the following instruction, hence having a cost of 0.
+  auto LooksLikeAFreeShift = [&]() {
+    if (ST->isThumb1Only() || Ty->isVectorTy())
+      return false;
+
+    if (!CxtI || !CxtI->hasOneUse() || !CxtI->isShift())
+      return false;
+    if (Op2Info != TargetTransformInfo::OK_UniformConstantValue)
+      return false;
+
+    // Folded into a ADC/ADD/AND/BIC/CMP/EOR/MVN/ORR/ORN/RSB/SBC/SUB
+    switch (cast<Instruction>(CxtI->user_back())->getOpcode()) {
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::And:
+    case Instruction::Xor:
+    case Instruction::Or:
+    case Instruction::ICmp:
+      return true;
+    default:
+      return false;
+    }
+  };
+  if (LooksLikeAFreeShift())
+    return 0;
+
  int BaseCost = ST->hasMVEIntegerOps() && Ty->isVectorTy()
                     ? ST->getMVEVectorCostFactor()
                     : 1;
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@ -187,7 +187,8 @@ public:
      TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
-      ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+      const Instruction *CxtI = nullptr);

  int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
                      unsigned AddressSpace, const Instruction *I = nullptr);
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@ -236,17 +236,18 @@ unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
 }

-unsigned HexagonTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-      TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
-      TTI::OperandValueProperties Opd1PropInfo,
-      TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value*> Args) {
+unsigned HexagonTTIImpl::getArithmeticInstrCost(
+    unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
+    TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
+    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+    const Instruction *CxtI) {
  if (Ty->isVectorTy()) {
    std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty);
    if (LT.second.isFloatingPoint())
      return LT.first + FloatFactor * getTypeNumElements(Ty);
  }
  return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
-                                       Opd1PropInfo, Opd2PropInfo, Args);
+                                       Opd1PropInfo, Opd2PropInfo, Args, CxtI);
 }

 unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@ -127,12 +127,14 @@ public:
            bool UseMaskForGaps = false);
  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
            const Instruction *I);
-  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-            TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
-            TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
-            TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
-            TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
-            ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+  unsigned getArithmeticInstrCost(
+      unsigned Opcode, Type *Ty,
+      TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+      const Instruction *CxtI = nullptr);
  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
            const Instruction *I = nullptr);
  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
--- a/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
+++ b/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
@ -81,7 +81,8 @@ public:
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
-      ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+      const Instruction *CxtI = nullptr) {
    int ISD = TLI->InstructionOpcodeToISD(Opcode);

    switch (ISD) {
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@ -113,7 +113,8 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
 int NVPTXTTIImpl::getArithmeticInstrCost(
    unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
    TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
-    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
+    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+    const Instruction *CxtI) {
  // Legalize the type.
  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@ -91,7 +91,8 @@ public:
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
-      ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+      const Instruction *CxtI = nullptr);

  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                               TTI::UnrollingPreferences &UP);
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@ -722,10 +722,13 @@ int PPCTTIImpl::vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1,
  return Cost * 2;
 }

-int PPCTTIImpl::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
-    TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
-    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
+int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                       TTI::OperandValueKind Op1Info,
+                                       TTI::OperandValueKind Op2Info,
+                                       TTI::OperandValueProperties Opd1PropInfo,
+                                       TTI::OperandValueProperties Opd2PropInfo,
+                                       ArrayRef<const Value *> Args,
+                                       const Instruction *CxtI) {
  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");

  // Fallback to the default implementation.
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@ -90,7 +90,8 @@ public:
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
-      ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+      const Instruction *CxtI = nullptr);
  int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                       const Instruction *I = nullptr);
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@ -348,11 +348,10 @@ static unsigned getNumVectorRegs(Type *Ty) {
 }

 int SystemZTTIImpl::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty,
-    TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
-    TTI::OperandValueProperties Opd1PropInfo,
-    TTI::OperandValueProperties Opd2PropInfo,
-    ArrayRef<const Value *> Args) {
+    unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
+    TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
+    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+    const Instruction *CxtI) {

  // TODO: return a good value for BB-VECTORIZER that includes the
  // immediate loads, which we do not want to count for the loop
@ -508,7 +507,7 @@ int SystemZTTIImpl::getArithmeticInstrCost(

  // Fallback to the default implementation.
  return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
-                                       Opd1PropInfo, Opd2PropInfo, Args);
+                                       Opd1PropInfo, Opd2PropInfo, Args, CxtI);
 }

 int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@ -75,7 +75,8 @@ public:
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
-      ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+      const Instruction *CxtI = nullptr);
  int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
  unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
  unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@ -46,7 +46,8 @@ unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) const {
 unsigned WebAssemblyTTIImpl::getArithmeticInstrCost(
    unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
    TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
-    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
+    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+    const Instruction *CxtI) {

  unsigned Cost = BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
      Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@ -61,7 +61,8 @@ public:
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
-      ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+      const Instruction *CxtI = nullptr);
  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);

  /// @}
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@ -169,12 +169,13 @@ unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
  return 2;
 }

-int X86TTIImpl::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty,
-    TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
-    TTI::OperandValueProperties Opd1PropInfo,
-    TTI::OperandValueProperties Opd2PropInfo,
-    ArrayRef<const Value *> Args) {
+int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                       TTI::OperandValueKind Op1Info,
+                                       TTI::OperandValueKind Op2Info,
+                                       TTI::OperandValueProperties Opd1PropInfo,
+                                       TTI::OperandValueProperties Opd2PropInfo,
+                                       ArrayRef<const Value *> Args,
+                                       const Instruction *CxtI) {
  // Legalize the type.
  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@ -125,7 +125,8 @@ public:
      TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
      TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
      TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
-      ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+      const Instruction *CxtI = nullptr);
  int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                       const Instruction *I = nullptr);
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -6234,7 +6234,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
    unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
    return N * TTI.getArithmeticInstrCost(
                   I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
-                   Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands);
+                   Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I);
  }
  case Instruction::FNeg: {
    unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
@ -6242,7 +6242,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
                   I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
                   TargetTransformInfo::OK_AnyValue,
                   TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
-                   I->getOperand(0));
+                   I->getOperand(0), I);
  }
  case Instruction::Select: {
    SelectInst *SI = cast<SelectInst>(I);
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@ -3420,13 +3420,13 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {

      SmallVector<const Value *, 4> Operands(VL0->operand_values());
      int ScalarEltCost = TTI->getArithmeticInstrCost(
-          E->getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands);
+          E->getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands, VL0);
      if (NeedToShuffleReuses) {
        ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
      }
      int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
-      int VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, Op1VK,
-                                                Op2VK, Op1VP, Op2VP, Operands);
+      int VecCost = TTI->getArithmeticInstrCost(
+          E->getOpcode(), VecTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands, VL0);
      return ReuseShuffleCost + VecCost - ScalarCost;
    }
    case Instruction::GetElementPtr: {
--- a/llvm/test/Analysis/CostModel/ARM/freeshift.ll
+++ b/llvm/test/Analysis/CostModel/ARM/freeshift.ll
@ -3,17 +3,17 @@

 define void @shl(i32 %a, i32 %b) {
 ; CHECK-LABEL: 'shl'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %as = shl i32 %a, 3
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ss = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ss = shl i32 %a, 3
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xs = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %xs = shl i32 %a, 3
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ns = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ns = shl i32 %a, 3
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %os = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %os = shl i32 %a, 3
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %is = shl i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %is = shl i32 %a, 3
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@ -34,17 +34,17 @@ define void @shl(i32 %a, i32 %b) {

 define void @ashr(i32 %a, i32 %b) {
 ; CHECK-LABEL: 'ashr'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %as = ashr i32 %a, 3
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ss = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ss = ashr i32 %a, 3
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xs = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %xs = ashr i32 %a, 3
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ns = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ns = ashr i32 %a, 3
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %os = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %os = ashr i32 %a, 3
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %is = ashr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %is = ashr i32 %a, 3
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
@ -65,17 +65,17 @@ define void @ashr(i32 %a, i32 %b) {

 define void @lshr(i32 %a, i32 %b) {
 ; CHECK-LABEL: 'lshr'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %as = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %as = lshr i32 %a, 3
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ac = add i32 %b, %as
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ss = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ss = lshr i32 %a, 3
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sc = sub i32 %b, %ss
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xs = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %xs = lshr i32 %a, 3
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %xc = xor i32 %b, %xs
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ns = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %ns = lshr i32 %a, 3
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %nc = and i32 %b, %ns
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %os = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %os = lshr i32 %a, 3
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %oc = or i32 %b, %os
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %is = lshr i32 %a, 3
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %is = lshr i32 %a, 3
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ic = icmp eq i32 %b, %is
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll
@ -5,11 +5,11 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-none-eabi"

 ; CHECK-LABEL: test
-; CHECK-COST: LV: Found an estimated cost of 1 for VF 1 For instruction:   %and515 = shl i32 %l41, 3
+; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction:   %and515 = shl i32 %l41, 3
 ; CHECK-COST: LV: Found an estimated cost of 1 for VF 1 For instruction:   %l45 = and i32 %and515, 131072
 ; CHECK-COST: LV: Found an estimated cost of 2 for VF 4 For instruction:   %and515 = shl i32 %l41, 3
 ; CHECK-COST: LV: Found an estimated cost of 2 for VF 4 For instruction:   %l45 = and i32 %and515, 131072
-; CHECK: vector.body
+; CHECK-NOT: vector.body

 define void @test([101 x i32] *%src, i32 %N) #0 {
 entry: