Revert "[VP,Integer,#2] ExpandVectorPredication pass"

This reverts commit 43bc584dc0. The commit broke the -DLLVM_ENABLE_MODULES=1 builds. http://green.lab.llvm.org/green/view/LLDB/job/lldb-cmake/31603/consoleFull#2136199809a1ca8a51-895e-46c6-af87-ce24fa4cd561
2021-04-30 16:28:44 -07:00 · 2021-04-30 16:28:44 -07:00 · 02c5ba8679
parent b11e4c9907
commit 02c5ba8679
21 changed files with 1 additions and 823 deletions
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@ -61,7 +61,6 @@ class TargetLibraryInfo;
 class Type;
 class User;
 class Value;
-class VPIntrinsic;
 struct KnownBits;
 template <typename T> class Optional;

@ -1380,38 +1379,6 @@ public:
  /// Intrinsics") Use of %evl is discouraged when that is not the case.
  bool hasActiveVectorLength() const;

-  struct VPLegalization {
-    enum VPTransform {
-      // keep the predicating parameter
-      Legal = 0,
-      // where legal, discard the predicate parameter
-      Discard = 1,
-      // transform into something else that is also predicating
-      Convert = 2
-    };
-
-    // How to transform the EVL parameter.
-    // Legal:   keep the EVL parameter as it is.
-    // Discard: Ignore the EVL parameter where it is safe to do so.
-    // Convert: Fold the EVL into the mask parameter.
-    VPTransform EVLParamStrategy;
-
-    // How to transform the operator.
-    // Legal:   The target supports this operator.
-    // Convert: Convert this to a non-VP operation.
-    // The 'Discard' strategy is invalid.
-    VPTransform OpStrategy;
-
-    bool shouldDoNothing() const {
-      return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
-    }
-    VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
-        : EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {}
-  };
-
-  /// \returns How the target needs this vector-predicated operation to be
-  /// transformed.
-  VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
  /// @}

  /// @}
@ -1721,8 +1688,6 @@ public:
  virtual bool supportsScalableVectors() const = 0;
  virtual bool hasActiveVectorLength() const = 0;
  virtual InstructionCost getInstructionLatency(const Instruction *I) = 0;
-  virtual VPLegalization
-  getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
 };

 template <typename T>
@ -2294,11 +2259,6 @@ public:
  InstructionCost getInstructionLatency(const Instruction *I) override {
    return Impl.getInstructionLatency(I);
  }
-
-  VPLegalization
-  getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
-    return Impl.getVPLegalizationStrategy(PI);
-  }
 };

 template <typename T>
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@ -750,13 +750,6 @@ public:

  bool hasActiveVectorLength() const { return false; }

-  TargetTransformInfo::VPLegalization
-  getVPLegalizationStrategy(const VPIntrinsic &PI) const {
-    return TargetTransformInfo::VPLegalization(
-        /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard,
-        /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
-  }
-
 protected:
  // Obtain the minimum required size to hold the value (without the sign)
  // In case of a vector it returns the min required size for one element.
--- a/llvm/include/llvm/CodeGen/ExpandVectorPredication.h
+++ b/llvm/include/llvm/CodeGen/ExpandVectorPredication.h
@ -1,23 +0,0 @@
-//===-- ExpandVectorPredication.h - Expand vector predication ---*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_EXPANDVECTORPREDICATION_H
-#define LLVM_CODEGEN_EXPANDVECTORPREDICATION_H
-
-#include "llvm/IR/PassManager.h"
-
-namespace llvm {
-
-class ExpandVectorPredicationPass
-    : public PassInfoMixin<ExpandVectorPredicationPass> {
-public:
-  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
-};
-} // end namespace llvm
-
-#endif // LLVM_CODEGEN_EXPANDVECTORPREDICATION_H
--- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def
+++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
@ -103,7 +103,6 @@ MACHINE_FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (
 #define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR)
 #endif
 DUMMY_FUNCTION_PASS("expandmemcmp", ExpandMemCmpPass, ())
-DUMMY_FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ())
 DUMMY_FUNCTION_PASS("gc-lowering", GCLoweringPass, ())
 DUMMY_FUNCTION_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass, ())
 DUMMY_FUNCTION_PASS("sjljehprepare", SjLjEHPreparePass, ())
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@ -453,11 +453,6 @@ namespace llvm {
  // the corresponding function in a vector library (e.g., SVML, libmvec).
  FunctionPass *createReplaceWithVeclibLegacyPass();

-  /// This pass expands the vector predication intrinsics into unpredicated
-  /// instructions with selects or just the explicit vector length into the
-  /// predicate mask.
-  FunctionPass *createExpandVectorPredicationPass();
-
  // This pass expands memcmp() to load/stores.
  FunctionPass *createExpandMemCmpPass();

--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@ -400,11 +400,9 @@ public:

  /// \return the mask parameter or nullptr.
  Value *getMaskParam() const;
-  void setMaskParam(Value *);

  /// \return the vector length parameter or nullptr.
  Value *getVectorLengthParam() const;
-  void setVectorLengthParam(Value *);

  /// \return whether the vector length param can be ignored.
  bool canIgnoreVectorLengthParam() const;
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@ -154,7 +154,6 @@ void initializeEntryExitInstrumenterPass(PassRegistry&);
 void initializeExpandMemCmpPassPass(PassRegistry&);
 void initializeExpandPostRAPass(PassRegistry&);
 void initializeExpandReductionsPass(PassRegistry&);
-void initializeExpandVectorPredicationPass(PassRegistry &);
 void initializeMakeGuardsExplicitLegacyPassPass(PassRegistry&);
 void initializeExternalAAWrapperPassPass(PassRegistry&);
 void initializeFEntryInserterPass(PassRegistry&);
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@ -197,7 +197,6 @@ namespace {
      (void) llvm::createMergeFunctionsPass();
      (void) llvm::createMergeICmpsLegacyPass();
      (void) llvm::createExpandMemCmpPass();
-      (void) llvm::createExpandVectorPredicationPass();
      std::string buf;
      llvm::raw_string_ostream os(buf);
      (void) llvm::createPrintModulePass(os);
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@ -1026,11 +1026,6 @@ bool TargetTransformInfo::preferPredicatedReductionSelect(
  return TTIImpl->preferPredicatedReductionSelect(Opcode, Ty, Flags);
 }

-TargetTransformInfo::VPLegalization
-TargetTransformInfo::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
-  return TTIImpl->getVPLegalizationStrategy(VPI);
-}
-
 bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
  return TTIImpl->shouldExpandReduction(II);
 }
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@ -29,7 +29,6 @@ add_llvm_component_library(LLVMCodeGen
  ExpandMemCmp.cpp
  ExpandPostRAPseudos.cpp
  ExpandReductions.cpp
-  ExpandVectorPredication.cpp
  FaultMaps.cpp
  FEntryInserter.cpp
  FinalizeISel.cpp
--- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@ -1,469 +0,0 @@
-//===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass implements IR expansion for vector predication intrinsics, allowing
-// targets to enable vector predication until just before codegen.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/ExpandVectorPredication.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-
-using namespace llvm;
-
-using VPLegalization = TargetTransformInfo::VPLegalization;
-using VPTransform = TargetTransformInfo::VPLegalization::VPTransform;
-
-// Keep this in sync with TargetTransformInfo::VPLegalization.
-#define VPINTERNAL_VPLEGAL_CASES                                               \
-  VPINTERNAL_CASE(Legal)                                                       \
-  VPINTERNAL_CASE(Discard)                                                     \
-  VPINTERNAL_CASE(Convert)
-
-#define VPINTERNAL_CASE(X) "|" #X
-
-// Override options.
-static cl::opt<std::string> EVLTransformOverride(
-    "expandvp-override-evl-transform", cl::init(""), cl::Hidden,
-    cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
-             ". If non-empty, ignore "
-             "TargetTransformInfo and "
-             "always use this transformation for the %evl parameter (Used in "
-             "testing)."));
-
-static cl::opt<std::string> MaskTransformOverride(
-    "expandvp-override-mask-transform", cl::init(""), cl::Hidden,
-    cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
-             ". If non-empty, Ignore "
-             "TargetTransformInfo and "
-             "always use this transformation for the %mask parameter (Used in "
-             "testing)."));
-
-#undef VPINTERNAL_CASE
-#define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X)
-
-static VPTransform parseOverrideOption(const std::string &TextOpt) {
-  return StringSwitch<VPTransform>(TextOpt) VPINTERNAL_VPLEGAL_CASES;
-}
-
-#undef VPINTERNAL_VPLEGAL_CASES
-
-// Whether any override options are set.
-static bool anyExpandVPOverridesSet() {
-  return !EVLTransformOverride.empty() || !MaskTransformOverride.empty();
-}
-
-#define DEBUG_TYPE "expandvp"
-
-STATISTIC(NumFoldedVL, "Number of folded vector length params");
-STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations");
-
-///// Helpers {
-
-/// \returns Whether the vector mask \p MaskVal has all lane bits set.
-static bool isAllTrueMask(Value *MaskVal) {
-  auto *ConstVec = dyn_cast<ConstantVector>(MaskVal);
-  return ConstVec && ConstVec->isAllOnesValue();
-}
-
-/// \returns A non-excepting divisor constant for this type.
-static Constant *getSafeDivisor(Type *DivTy) {
-  assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type");
-  return ConstantInt::get(DivTy, 1u, false);
-}
-
-/// Transfer operation properties from \p OldVPI to \p NewVal.
-static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) {
-  auto *NewInst = dyn_cast<Instruction>(&NewVal);
-  if (!NewInst || !isa<FPMathOperator>(NewVal))
-    return;
-
-  auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI);
-  if (!OldFMOp)
-    return;
-
-  NewInst->setFastMathFlags(OldFMOp->getFastMathFlags());
-}
-
-/// Transfer all properties from \p OldOp to \p NewOp and replace all uses.
-/// OldVP gets erased.
-static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) {
-  transferDecorations(NewOp, OldOp);
-  OldOp.replaceAllUsesWith(&NewOp);
-  OldOp.eraseFromParent();
-}
-
-//// } Helpers
-
-namespace {
-
-// Expansion pass state at function scope.
-struct CachingVPExpander {
-  Function &F;
-  const TargetTransformInfo &TTI;
-
-  /// \returns A (fixed length) vector with ascending integer indices
-  /// (<0, 1, ..., NumElems-1>).
-  /// \p Builder
-  ///    Used for instruction creation.
-  /// \p LaneTy
-  ///    Integer element type of the result vector.
-  /// \p NumElems
-  ///    Number of vector elements.
-  Value *createStepVector(IRBuilder<> &Builder, Type *LaneTy,
-                          unsigned NumElems);
-
-  /// \returns A bitmask that is true where the lane position is less-than \p
-  /// EVLParam
-  ///
-  /// \p Builder
-  ///    Used for instruction creation.
-  /// \p VLParam
-  ///    The explicit vector length parameter to test against the lane
-  ///    positions.
-  /// \p ElemCount
-  ///    Static (potentially scalable) number of vector elements.
-  Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam,
-                          ElementCount ElemCount);
-
-  Value *foldEVLIntoMask(VPIntrinsic &VPI);
-
-  /// "Remove" the %evl parameter of \p PI by setting it to the static vector
-  /// length of the operation.
-  void discardEVLParameter(VPIntrinsic &PI);
-
-  /// \brief Lower this VP binary operator to a unpredicated binary operator.
-  Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder,
-                                           VPIntrinsic &PI);
-
-  /// \brief Query TTI and expand the vector predication in \p P accordingly.
-  Value *expandPredication(VPIntrinsic &PI);
-
-  /// \brief  Determine how and whether the VPIntrinsic \p VPI shall be
-  /// expanded. This overrides TTI with the cl::opts listed at the top of this
-  /// file.
-  VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const;
-  bool UsingTTIOverrides;
-
-public:
-  CachingVPExpander(Function &F, const TargetTransformInfo &TTI)
-      : F(F), TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {}
-
-  bool expandVectorPredication();
-};
-
-//// CachingVPExpander {
-
-Value *CachingVPExpander::createStepVector(IRBuilder<> &Builder, Type *LaneTy,
-                                           unsigned NumElems) {
-  // TODO add caching
-  SmallVector<Constant *, 16> ConstElems;
-
-  for (unsigned Idx = 0; Idx < NumElems; ++Idx)
-    ConstElems.push_back(ConstantInt::get(LaneTy, Idx, false));
-
-  return ConstantVector::get(ConstElems);
-}
-
-Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder,
-                                           Value *EVLParam,
-                                           ElementCount ElemCount) {
-  // TODO add caching
-  // Scalable vector %evl conversion.
-  if (ElemCount.isScalable()) {
-    auto *M = Builder.GetInsertBlock()->getModule();
-    Type *BoolVecTy = VectorType::get(Builder.getInt1Ty(), ElemCount);
-    Function *ActiveMaskFunc = Intrinsic::getDeclaration(
-        M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->getType()});
-    // `get_active_lane_mask` performs an implicit less-than comparison.
-    Value *ConstZero = Builder.getInt32(0);
-    return Builder.CreateCall(ActiveMaskFunc, {ConstZero, EVLParam});
-  }
-
-  // Fixed vector %evl conversion.
-  Type *LaneTy = EVLParam->getType();
-  unsigned NumElems = ElemCount.getFixedValue();
-  Value *VLSplat = Builder.CreateVectorSplat(NumElems, EVLParam);
-  Value *IdxVec = createStepVector(Builder, LaneTy, NumElems);
-  return Builder.CreateICmp(CmpInst::ICMP_ULT, IdxVec, VLSplat);
-}
-
-Value *
-CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
-                                                     VPIntrinsic &VPI) {
-  assert((isSafeToSpeculativelyExecute(&VPI) ||
-          VPI.canIgnoreVectorLengthParam()) &&
-         "Implicitly dropping %evl in non-speculatable operator!");
-
-  auto OC = static_cast<Instruction::BinaryOps>(VPI.getFunctionalOpcode());
-  assert(Instruction::isBinaryOp(OC));
-
-  Value *Op0 = VPI.getOperand(0);
-  Value *Op1 = VPI.getOperand(1);
-  Value *Mask = VPI.getMaskParam();
-
-  // Blend in safe operands.
-  if (Mask && !isAllTrueMask(Mask)) {
-    switch (OC) {
-    default:
-      // Can safely ignore the predicate.
-      break;
-
-    // Division operators need a safe divisor on masked-off lanes (1).
-    case Instruction::UDiv:
-    case Instruction::SDiv:
-    case Instruction::URem:
-    case Instruction::SRem:
-      // 2nd operand must not be zero.
-      Value *SafeDivisor = getSafeDivisor(VPI.getType());
-      Op1 = Builder.CreateSelect(Mask, Op1, SafeDivisor);
-    }
-  }
-
-  Value *NewBinOp = Builder.CreateBinOp(OC, Op0, Op1, VPI.getName());
-
-  replaceOperation(*NewBinOp, VPI);
-  return NewBinOp;
-}
-
-void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) {
-  LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n");
-
-  if (VPI.canIgnoreVectorLengthParam())
-    return;
-
-  Value *EVLParam = VPI.getVectorLengthParam();
-  if (!EVLParam)
-    return;
-
-  ElementCount StaticElemCount = VPI.getStaticVectorLength();
-  Value *MaxEVL = nullptr;
-  Type *Int32Ty = Type::getInt32Ty(VPI.getContext());
-  if (StaticElemCount.isScalable()) {
-    // TODO add caching
-    auto *M = VPI.getModule();
-    Function *VScaleFunc =
-        Intrinsic::getDeclaration(M, Intrinsic::vscale, Int32Ty);
-    IRBuilder<> Builder(VPI.getParent(), VPI.getIterator());
-    Value *FactorConst = Builder.getInt32(StaticElemCount.getKnownMinValue());
-    Value *VScale = Builder.CreateCall(VScaleFunc, {}, "vscale");
-    MaxEVL = Builder.CreateMul(VScale, FactorConst, "scalable_size",
-                               /*NUW*/ true, /*NSW*/ false);
-  } else {
-    MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.getFixedValue(), false);
-  }
-  VPI.setVectorLengthParam(MaxEVL);
-}
-
-Value *CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) {
-  LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI << '\n');
-
-  IRBuilder<> Builder(&VPI);
-
-  // Ineffective %evl parameter and so nothing to do here.
-  if (VPI.canIgnoreVectorLengthParam())
-    return &VPI;
-
-  // Only VP intrinsics can have an %evl parameter.
-  Value *OldMaskParam = VPI.getMaskParam();
-  Value *OldEVLParam = VPI.getVectorLengthParam();
-  assert(OldMaskParam && "no mask param to fold the vl param into");
-  assert(OldEVLParam && "no EVL param to fold away");
-
-  LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam << '\n');
-  LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam << '\n');
-
-  // Convert the %evl predication into vector mask predication.
-  ElementCount ElemCount = VPI.getStaticVectorLength();
-  Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount);
-  Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam);
-  VPI.setMaskParam(NewMaskParam);
-
-  // Drop the %evl parameter.
-  discardEVLParameter(VPI);
-  assert(VPI.canIgnoreVectorLengthParam() &&
-         "transformation did not render the evl param ineffective!");
-
-  // Reassess the modified instruction.
-  return &VPI;
-}
-
-Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
-  LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI << '\n');
-
-  IRBuilder<> Builder(&VPI);
-
-  // Try lowering to a LLVM instruction first.
-  unsigned OC = VPI.getFunctionalOpcode();
-
-  if (Instruction::isBinaryOp(OC))
-    return expandPredicationInBinaryOperator(Builder, VPI);
-
-  return &VPI;
-}
-
-//// } CachingVPExpander
-
-struct TransformJob {
-  VPIntrinsic *PI;
-  TargetTransformInfo::VPLegalization Strategy;
-  TransformJob(VPIntrinsic *PI, TargetTransformInfo::VPLegalization InitStrat)
-      : PI(PI), Strategy(InitStrat) {}
-
-  bool isDone() const { return Strategy.shouldDoNothing(); }
-};
-
-void sanitizeStrategy(Instruction &I, VPLegalization &LegalizeStrat) {
-  // Speculatable instructions do not strictly need predication.
-  if (isSafeToSpeculativelyExecute(&I)) {
-    // Converting a speculatable VP intrinsic means dropping %mask and %evl.
-    // No need to expand %evl into the %mask only to ignore that code.
-    if (LegalizeStrat.OpStrategy == VPLegalization::Convert)
-      LegalizeStrat.EVLParamStrategy = VPLegalization::Discard;
-    return;
-  }
-
-  // We have to preserve the predicating effect of %evl for this
-  // non-speculatable VP intrinsic.
-  // 1) Never discard %evl.
-  // 2) If this VP intrinsic will be expanded to non-VP code, make sure that
-  //    %evl gets folded into %mask.
-  if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) ||
-      (LegalizeStrat.OpStrategy == VPLegalization::Convert)) {
-    LegalizeStrat.EVLParamStrategy = VPLegalization::Convert;
-  }
-}
-
-VPLegalization
-CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
-  auto VPStrat = TTI.getVPLegalizationStrategy(VPI);
-  if (LLVM_LIKELY(!UsingTTIOverrides)) {
-    // No overrides - we are in production.
-    return VPStrat;
-  }
-
-  // Overrides set - we are in testing, the following does not need to be
-  // efficient.
-  VPStrat.EVLParamStrategy = parseOverrideOption(EVLTransformOverride);
-  VPStrat.OpStrategy = parseOverrideOption(MaskTransformOverride);
-  return VPStrat;
-}
-
-/// \brief Expand llvm.vp.* intrinsics as requested by \p TTI.
-bool CachingVPExpander::expandVectorPredication() {
-  SmallVector<TransformJob, 16> Worklist;
-
-  // Collect all VPIntrinsics that need expansion and determine their expansion
-  // strategy.
-  for (auto &I : instructions(F)) {
-    auto *VPI = dyn_cast<VPIntrinsic>(&I);
-    if (!VPI)
-      continue;
-    auto VPStrat = getVPLegalizationStrategy(*VPI);
-    sanitizeStrategy(I, VPStrat);
-    if (!VPStrat.shouldDoNothing())
-      Worklist.emplace_back(VPI, VPStrat);
-  }
-  if (Worklist.empty())
-    return false;
-
-  // Transform all VPIntrinsics on the worklist.
-  LLVM_DEBUG(dbgs() << "\n:::: Transforming " << Worklist.size()
-                    << " instructions ::::\n");
-  for (TransformJob Job : Worklist) {
-    // Transform the EVL parameter.
-    switch (Job.Strategy.EVLParamStrategy) {
-    case VPLegalization::Legal:
-      break;
-    case VPLegalization::Discard:
-      discardEVLParameter(*Job.PI);
-      break;
-    case VPLegalization::Convert:
-      if (foldEVLIntoMask(*Job.PI))
-        ++NumFoldedVL;
-      break;
-    }
-    Job.Strategy.EVLParamStrategy = VPLegalization::Legal;
-
-    // Replace with a non-predicated operation.
-    switch (Job.Strategy.OpStrategy) {
-    case VPLegalization::Legal:
-      break;
-    case VPLegalization::Discard:
-      llvm_unreachable("Invalid strategy for operators.");
-    case VPLegalization::Convert:
-      expandPredication(*Job.PI);
-      ++NumLoweredVPOps;
-      break;
-    }
-    Job.Strategy.OpStrategy = VPLegalization::Legal;
-
-    assert(Job.isDone() && "incomplete transformation");
-  }
-
-  return true;
-}
-class ExpandVectorPredication : public FunctionPass {
-public:
-  static char ID;
-  ExpandVectorPredication() : FunctionPass(ID) {
-    initializeExpandVectorPredicationPass(*PassRegistry::getPassRegistry());
-  }
-
-  bool runOnFunction(Function &F) override {
-    const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-    CachingVPExpander VPExpander(F, *TTI);
-    return VPExpander.expandVectorPredication();
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<TargetTransformInfoWrapperPass>();
-    AU.setPreservesCFG();
-  }
-};
-} // namespace
-
-char ExpandVectorPredication::ID;
-INITIALIZE_PASS_BEGIN(ExpandVectorPredication, "expandvp",
-                      "Expand vector predication intrinsics", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(ExpandVectorPredication, "expandvp",
-                    "Expand vector predication intrinsics", false, false)
-
-FunctionPass *llvm::createExpandVectorPredicationPass() {
-  return new ExpandVectorPredication();
-}
-
-PreservedAnalyses
-ExpandVectorPredicationPass::run(Function &F, FunctionAnalysisManager &AM) {
-  const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
-  CachingVPExpander VPExpander(F, TTI);
-  if (!VPExpander.expandVectorPredication())
-    return PreservedAnalyses::all();
-  PreservedAnalyses PA;
-  PA.preserveSet<CFGAnalyses>();
-  return PA;
-}
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@ -864,11 +864,6 @@ void TargetPassConfig::addIRPasses() {
  if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining)
    addPass(createPartiallyInlineLibCallsPass());

-  // Expand vector predication intrinsics into standard IR instructions.
-  // This pass has to run before ScalarizeMaskedMemIntrin and ExpandReduction
-  // passes since it emits those kinds of intrinsics.
-  addPass(createExpandVectorPredicationPass());
-
  // Add scalarization of target's unsupported masked memory intrinsics pass.
  // the unsupported intrinsic will be replaced with a chain of basic blocks,
  // that stores/loads element one-by-one if the appropriate mask bit is set.
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@ -279,11 +279,6 @@ Value *VPIntrinsic::getMaskParam() const {
  return nullptr;
 }

-void VPIntrinsic::setMaskParam(Value *NewMask) {
-  auto MaskPos = GetMaskParamPos(getIntrinsicID());
-  setArgOperand(*MaskPos, NewMask);
-}
-
 Value *VPIntrinsic::getVectorLengthParam() const {
  auto vlenPos = GetVectorLengthParamPos(getIntrinsicID());
  if (vlenPos)
@ -291,11 +286,6 @@ Value *VPIntrinsic::getVectorLengthParam() const {
  return nullptr;
 }

-void VPIntrinsic::setVectorLengthParam(Value *NewEVL) {
-  auto EVLPos = GetVectorLengthParamPos(getIntrinsicID());
-  setArgOperand(*EVLPos, NewEVL);
-}
-
 Optional<int> VPIntrinsic::GetMaskParamPos(Intrinsic::ID IntrinsicID) {
  switch (IntrinsicID) {
  default:
--- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@ -21,7 +21,6 @@
 ; CHECK-NEXT:       Shadow Stack GC Lowering
 ; CHECK-NEXT:       Lower constant intrinsics
 ; CHECK-NEXT:       Remove unreachable blocks from the CFG
-; CHECK-NEXT:       Expand vector predication intrinsics
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
 ; CHECK-NEXT:       Expand reduction intrinsics
 ; CHECK-NEXT:       AArch64 Stack Tagging
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@ -56,7 +56,6 @@
 ; CHECK-NEXT:       Constant Hoisting
 ; CHECK-NEXT:       Replace intrinsics with calls to vector library
 ; CHECK-NEXT:       Partially inline calls to library functions
-; CHECK-NEXT:       Expand vector predication intrinsics
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
 ; CHECK-NEXT:       Expand reduction intrinsics
 ; CHECK-NEXT:     Stack Safety Analysis
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@ -37,7 +37,6 @@
 ; CHECK-NEXT:      Constant Hoisting
 ; CHECK-NEXT:      Replace intrinsics with calls to vector library
 ; CHECK-NEXT:      Partially inline calls to library functions
-; CHECK-NEXT:      Expand vector predication intrinsics
 ; CHECK-NEXT:      Scalarize Masked Memory Intrinsics
 ; CHECK-NEXT:      Expand reduction intrinsics
 ; CHECK-NEXT:      Natural Loop Information
--- a/llvm/test/CodeGen/Generic/expand-vp.ll
+++ b/llvm/test/CodeGen/Generic/expand-vp.ll
@ -1,245 +0,0 @@
-; Partial expansion cases (still VP with parameter expansions).
-; RUN: opt --expandvp --expandvp-override-evl-transform=Legal --expandvp-override-mask-transform=Legal -S < %s | FileCheck %s --check-prefix=LEGAL_LEGAL
-; RUN: opt --expandvp --expandvp-override-evl-transform=Discard --expandvp-override-mask-transform=Legal -S < %s | FileCheck %s --check-prefix=DISCARD_LEGAL
-; RUN: opt --expandvp --expandvp-override-evl-transform=Convert --expandvp-override-mask-transform=Legal -S < %s | FileCheck %s --check-prefix=CONVERT_LEGAL
-; Full expansion cases (all expanded to non-VP).
-; RUN: opt --expandvp --expandvp-override-evl-transform=Discard --expandvp-override-mask-transform=Convert -S < %s | FileCheck %s --check-prefix=ALL-CONVERT
-; RUN: opt --expandvp -S < %s | FileCheck %s --check-prefix=ALL-CONVERT
-; RUN: opt --expandvp --expandvp-override-evl-transform=Legal --expandvp-override-mask-transform=Convert -S < %s | FileCheck %s --check-prefix=ALL-CONVERT
-; RUN: opt --expandvp --expandvp-override-evl-transform=Convert --expandvp-override-mask-transform=Convert -S < %s | FileCheck %s --check-prefix=ALL-CONVERT
-
-
-; Fixed-width vectors
-; Integer arith
-declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.mul.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.srem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.urem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-; Bit arith
-declare <8 x i32> @llvm.vp.and.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.xor.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.or.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.shl.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-
-; Fixed vector test function.
-define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
-  %r0 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-  %r1 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-  %r2 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-  %r3 = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-  %r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-  %r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-  %r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-  %r7 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-  %r8 = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-  %r9 = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-  %rA = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-  %rB = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-  %rC = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-  ret void
-}
-
-; Scalable-width vectors
-; Integer arith
-declare <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.sdiv.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.srem.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.udiv.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.urem.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-; Bit arith
-declare <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.xor.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.or.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.lshr.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-
-; Scalable vector test function.
-define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) {
-  %r0 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-  %r1 = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-  %r2 = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-  %r3 = call <vscale x 4 x i32> @llvm.vp.sdiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-  %r4 = call <vscale x 4 x i32> @llvm.vp.srem.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-  %r5 = call <vscale x 4 x i32> @llvm.vp.udiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-  %r6 = call <vscale x 4 x i32> @llvm.vp.urem.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-  %r7 = call <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-  %r8 = call <vscale x 4 x i32> @llvm.vp.or.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-  %r9 = call <vscale x 4 x i32> @llvm.vp.xor.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-  %rA = call <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-  %rB = call <vscale x 4 x i32> @llvm.vp.lshr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-  %rC = call <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-  ret void
-}
-; All VP intrinsics have to be lowered into non-VP ops
-; Convert %evl into %mask for non-speculatable VP intrinsics and emit the
-; instruction+select idiom with a non-VP SIMD instruction.
-;
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.add}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.sub}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.mul}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.sdiv}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.srem}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.udiv}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.urem}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.and}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.or}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.xor}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.ashr}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.lshr}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.shl}}
-;
-; ALL-CONVERT: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
-; ALL-CONVERT-NEXT:  %{{.*}} = add <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT:  %{{.*}} = sub <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT:  %{{.*}} = mul <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT:  [[NINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i32 0
-; ALL-CONVERT-NEXT:  [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NINS]], <8 x i32> poison, <8 x i32> zeroinitializer
-; ALL-CONVERT-NEXT:  [[EVLM:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
-; ALL-CONVERT-NEXT:  [[NEWM:%.+]] = and <8 x i1> [[EVLM]], %m
-; ALL-CONVERT-NEXT:  [[SELONE:%.+]] = select <8 x i1> [[NEWM]], <8 x i32> %i1, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; ALL-CONVERT-NEXT:  %{{.+}} = sdiv <8 x i32> %i0, [[SELONE]]
-; ALL-CONVERT-NOT:   %{{.+}} = srem <8 x i32> %i0, %i1
-; ALL-CONVERT:       %{{.+}} = srem <8 x i32> %i0, %{{.+}}
-; ALL-CONVERT-NOT:   %{{.+}} = udiv <8 x i32> %i0, %i1
-; ALL-CONVERT:       %{{.+}} = udiv <8 x i32> %i0, %{{.+}}
-; ALL-CONVERT-NOT:   %{{.+}} = urem <8 x i32> %i0, %i1
-; ALL-CONVERT:       %{{.+}} = urem <8 x i32> %i0, %{{.+}}
-; ALL-CONVERT-NEXT:  %{{.+}} = and <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT:  %{{.+}} = or <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT:  %{{.+}} = xor <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT:  %{{.+}} = ashr <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT:  %{{.+}} = lshr <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT:  %{{.+}} = shl <8 x i32> %i0, %i1
-; ALL-CONVERT:       ret void
-
-
-
-
-; All legal - don't transform anything.
-
-; LEGAL_LEGAL: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
-; LEGAL_LEGAL-NEXT:   %r0 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:   %r1 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:   %r2 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:   %r3 = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:   %r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:   %r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:   %r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:   %r7 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:   %r8 = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:   %r9 = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:   %rA = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:   %rB = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:   %rC = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:   ret void
-
-; LEGAL_LEGAL:define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) {
-; LEGAL_LEGAL-NEXT:  %r0 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:  %r1 = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:  %r2 = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:  %r3 = call <vscale x 4 x i32> @llvm.vp.sdiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:  %r4 = call <vscale x 4 x i32> @llvm.vp.srem.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:  %r5 = call <vscale x 4 x i32> @llvm.vp.udiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:  %r6 = call <vscale x 4 x i32> @llvm.vp.urem.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:  %r7 = call <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:  %r8 = call <vscale x 4 x i32> @llvm.vp.or.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:  %r9 = call <vscale x 4 x i32> @llvm.vp.xor.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:  %rA = call <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:  %rB = call <vscale x 4 x i32> @llvm.vp.lshr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:  %rC = call <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT:  ret void
-
-
-; Drop %evl where possible else fold %evl into %mask (%evl Discard, %mask Legal)
-;
-; There is no caching yet in the ExpandVectorPredication pass and the %evl
-; expansion code is emitted for every non-speculatable intrinsic again. Hence,
-; only check that..
-; (1) The %evl folding code and %mask are correct for the first
-;     non-speculatable VP intrinsic.
-; (2) All other non-speculatable VP intrinsics have a modified mask argument.
-; (3) All speculatable VP intrinsics keep their %mask and %evl.
-; (4) All VP intrinsics have an ineffective %evl parameter.
-
-; DISCARD_LEGAL: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
-; DISCARD_LEGAL-NEXT:   %r0 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT:   %r1 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT:   %r2 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT:   [[NSPLATINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i32 0
-; DISCARD_LEGAL-NEXT:   [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NSPLATINS]], <8 x i32> poison, <8 x i32> zeroinitializer
-; DISCARD_LEGAL-NEXT:   [[EVLMASK:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
-; DISCARD_LEGAL-NEXT:   [[NEWMASK:%.+]] = and <8 x i1> [[EVLMASK]], %m
-; DISCARD_LEGAL-NEXT:   %r3 = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> [[NEWMASK]], i32 8)
-; DISCARD_LEGAL-NOT:    %r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NOT:    %r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NOT:    %r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL:        %r7 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT:   %r8 = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT:   %r9 = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT:   %rA = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT:   %rB = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT:   %rC = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT:   ret void
-
-; TODO compute vscale only once and use caching.
-; In the meantime, we only check for the correct vscale code for the first VP
-; intrinsic and skip over it for all others.
-
-; DISCARD_LEGAL: define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) {
-; DISCARD_LEGAL-NEXT: %vscale = call i32 @llvm.vscale.i32()
-; DISCARD_LEGAL-NEXT: %scalable_size = mul nuw i32 %vscale, 4
-; DISCARD_LEGAL-NEXT: %r0 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %scalable_size)
-; DISCARD_LEGAL:      %r1 = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %scalable_size{{.*}})
-; DISCARD_LEGAL:      %r2 = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %scalable_size{{.*}})
-; DISCARD_LEGAL:      [[EVLM:%.+]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 %n)
-; DISCARD_LEGAL:      [[NEWM:%.+]] = and <vscale x 4 x i1> [[EVLM]], %m
-; DISCARD_LEGAL:      %r3 = call <vscale x 4 x i32> @llvm.vp.sdiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> [[NEWM]], i32 %scalable_size{{.*}})
-; DISCARD_LEGAL-NOT:  %{{.+}} = call <vscale x 4 x i32> @llvm.vp.{{.*}}, i32 %n)
-; DISCARD_LEGAL:      ret void
-
-
-; Convert %evl into %mask everywhere (%evl Convert, %mask Legal)
-;
-; For the same reasons as in the (%evl Discard, %mask Legal) case only check that..
-; (1) The %evl folding code and %mask are correct for the first VP intrinsic.
-; (2) All other VP intrinsics have a modified mask argument.
-; (3) All VP intrinsics have an ineffective %evl parameter.
-;
-; CONVERT_LEGAL: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
-; CONVERT_LEGAL-NEXT:  [[NINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i32 0
-; CONVERT_LEGAL-NEXT:  [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NINS]], <8 x i32> poison, <8 x i32> zeroinitializer
-; CONVERT_LEGAL-NEXT:  [[EVLM:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
-; CONVERT_LEGAL-NEXT:  [[NEWM:%.+]] = and <8 x i1> [[EVLM]], %m
-; CONVERT_LEGAL-NEXT:  %{{.+}} = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> [[NEWM]], i32 8)
-; CONVERT_LEGAL-NOT:   %{{.+}} = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT:   %{{.+}} = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT:   %{{.+}} = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT:   %{{.+}} = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT:   %{{.+}} = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT:   %{{.+}} = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT:   %{{.+}} = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT:   %{{.+}} = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT:   %{{.+}} = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT:   %{{.+}} = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT:   %{{.+}} = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT:   %{{.+}} = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL:       ret void
-
-; Similar to %evl discard, %mask legal but make sure the first VP intrinsic has a legal expansion
-; CONVERT_LEGAL: define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) {
-; CONVERT_LEGAL-NEXT:   [[EVLM:%.+]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 %n)
-; CONVERT_LEGAL-NEXT:   [[NEWM:%.+]] = and <vscale x 4 x i1> [[EVLM]], %m
-; CONVERT_LEGAL-NEXT:   %vscale = call i32 @llvm.vscale.i32()
-; CONVERT_LEGAL-NEXT:   %scalable_size = mul nuw i32 %vscale, 4
-; CONVERT_LEGAL-NEXT:   %r0 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> [[NEWM]], i32 %scalable_size)
-; CONVERT_LEGAL-NOT:    %{{.*}} = call <vscale x 4 x i32> @llvm.vp.{{.*}}, i32 %n)
-; CONVERT_LEGAL:        ret void
-
--- a/llvm/test/CodeGen/X86/O0-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
@ -25,7 +25,6 @@
 ; CHECK-NEXT:       Shadow Stack GC Lowering
 ; CHECK-NEXT:       Lower constant intrinsics
 ; CHECK-NEXT:       Remove unreachable blocks from the CFG
-; CHECK-NEXT:       Expand vector predication intrinsics
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
 ; CHECK-NEXT:       Expand reduction intrinsics
 ; CHECK-NEXT:       Expand indirectbr instructions
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@ -54,7 +54,6 @@
 ; CHECK-NEXT:       Constant Hoisting
 ; CHECK-NEXT:       Replace intrinsics with calls to vector library
 ; CHECK-NEXT:       Partially inline calls to library functions
-; CHECK-NEXT:       Expand vector predication intrinsics
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
 ; CHECK-NEXT:       Expand reduction intrinsics
 ; CHECK-NEXT:       Interleaved Access Pass
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@ -352,7 +352,6 @@ int main(int argc, char **argv) {
  initializeVectorization(*Registry);
  initializeScalarizeMaskedMemIntrinLegacyPassPass(*Registry);
  initializeExpandReductionsPass(*Registry);
-  initializeExpandVectorPredicationPass(*Registry);
  initializeHardwareLoopsPass(*Registry);
  initializeTransformUtils(*Registry);
  initializeReplaceWithVeclibLegacyPass(*Registry);
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@ -513,7 +513,7 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
      "safe-stack",           "cost-model",
      "codegenprepare",       "interleaved-load-combine",
      "unreachableblockelim", "verify-safepoint-ir",
-      "atomic-expand",        "expandvp",
+      "atomic-expand",
      "hardware-loops",       "type-promotion",
      "mve-tail-predication", "interleaved-access",
      "global-merge",         "pre-isel-intrinsic-lowering",
@ -591,7 +591,6 @@ int main(int argc, char **argv) {
  initializePostInlineEntryExitInstrumenterPass(Registry);
  initializeUnreachableBlockElimLegacyPassPass(Registry);
  initializeExpandReductionsPass(Registry);
-  initializeExpandVectorPredicationPass(Registry);
  initializeWasmEHPreparePass(Registry);
  initializeWriteBitcodePassPass(Registry);
  initializeHardwareLoopsPass(Registry);