Revert "[VP,Integer,#2] ExpandVectorPredication pass"

This reverts commit 43bc584dc0.

The commit broke the -DLLVM_ENABLE_MODULES=1 builds.

http://green.lab.llvm.org/green/view/LLDB/job/lldb-cmake/31603/consoleFull#2136199809a1ca8a51-895e-46c6-af87-ce24fa4cd561
This commit is contained in:
Adrian Prantl 2021-04-30 16:28:44 -07:00
parent b11e4c9907
commit 02c5ba8679
21 changed files with 1 additions and 823 deletions

View File

@ -61,7 +61,6 @@ class TargetLibraryInfo;
class Type; class Type;
class User; class User;
class Value; class Value;
class VPIntrinsic;
struct KnownBits; struct KnownBits;
template <typename T> class Optional; template <typename T> class Optional;
@ -1380,38 +1379,6 @@ public:
/// Intrinsics") Use of %evl is discouraged when that is not the case. /// Intrinsics") Use of %evl is discouraged when that is not the case.
bool hasActiveVectorLength() const; bool hasActiveVectorLength() const;
struct VPLegalization {
enum VPTransform {
// keep the predicating parameter
Legal = 0,
// where legal, discard the predicate parameter
Discard = 1,
// transform into something else that is also predicating
Convert = 2
};
// How to transform the EVL parameter.
// Legal: keep the EVL parameter as it is.
// Discard: Ignore the EVL parameter where it is safe to do so.
// Convert: Fold the EVL into the mask parameter.
VPTransform EVLParamStrategy;
// How to transform the operator.
// Legal: The target supports this operator.
// Convert: Convert this to a non-VP operation.
// The 'Discard' strategy is invalid.
VPTransform OpStrategy;
bool shouldDoNothing() const {
return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
}
VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
: EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {}
};
/// \returns How the target needs this vector-predicated operation to be
/// transformed.
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
/// @} /// @}
/// @} /// @}
@ -1721,8 +1688,6 @@ public:
virtual bool supportsScalableVectors() const = 0; virtual bool supportsScalableVectors() const = 0;
virtual bool hasActiveVectorLength() const = 0; virtual bool hasActiveVectorLength() const = 0;
virtual InstructionCost getInstructionLatency(const Instruction *I) = 0; virtual InstructionCost getInstructionLatency(const Instruction *I) = 0;
virtual VPLegalization
getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
}; };
template <typename T> template <typename T>
@ -2294,11 +2259,6 @@ public:
InstructionCost getInstructionLatency(const Instruction *I) override { InstructionCost getInstructionLatency(const Instruction *I) override {
return Impl.getInstructionLatency(I); return Impl.getInstructionLatency(I);
} }
VPLegalization
getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
return Impl.getVPLegalizationStrategy(PI);
}
}; };
template <typename T> template <typename T>

View File

@ -750,13 +750,6 @@ public:
bool hasActiveVectorLength() const { return false; } bool hasActiveVectorLength() const { return false; }
TargetTransformInfo::VPLegalization
getVPLegalizationStrategy(const VPIntrinsic &PI) const {
return TargetTransformInfo::VPLegalization(
/* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard,
/* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
}
protected: protected:
// Obtain the minimum required size to hold the value (without the sign) // Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element. // In case of a vector it returns the min required size for one element.

View File

@ -1,23 +0,0 @@
//===-- ExpandVectorPredication.h - Expand vector predication ---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_EXPANDVECTORPREDICATION_H
#define LLVM_CODEGEN_EXPANDVECTORPREDICATION_H
#include "llvm/IR/PassManager.h"
namespace llvm {
class ExpandVectorPredicationPass
: public PassInfoMixin<ExpandVectorPredicationPass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
} // end namespace llvm
#endif // LLVM_CODEGEN_EXPANDVECTORPREDICATION_H

View File

@ -103,7 +103,6 @@ MACHINE_FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (
#define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) #define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR)
#endif #endif
DUMMY_FUNCTION_PASS("expandmemcmp", ExpandMemCmpPass, ()) DUMMY_FUNCTION_PASS("expandmemcmp", ExpandMemCmpPass, ())
DUMMY_FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ())
DUMMY_FUNCTION_PASS("gc-lowering", GCLoweringPass, ()) DUMMY_FUNCTION_PASS("gc-lowering", GCLoweringPass, ())
DUMMY_FUNCTION_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass, ()) DUMMY_FUNCTION_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass, ())
DUMMY_FUNCTION_PASS("sjljehprepare", SjLjEHPreparePass, ()) DUMMY_FUNCTION_PASS("sjljehprepare", SjLjEHPreparePass, ())

View File

@ -453,11 +453,6 @@ namespace llvm {
// the corresponding function in a vector library (e.g., SVML, libmvec). // the corresponding function in a vector library (e.g., SVML, libmvec).
FunctionPass *createReplaceWithVeclibLegacyPass(); FunctionPass *createReplaceWithVeclibLegacyPass();
/// This pass expands the vector predication intrinsics into unpredicated
/// instructions with selects or just the explicit vector length into the
/// predicate mask.
FunctionPass *createExpandVectorPredicationPass();
// This pass expands memcmp() to load/stores. // This pass expands memcmp() to load/stores.
FunctionPass *createExpandMemCmpPass(); FunctionPass *createExpandMemCmpPass();

View File

@ -400,11 +400,9 @@ public:
/// \return the mask parameter or nullptr. /// \return the mask parameter or nullptr.
Value *getMaskParam() const; Value *getMaskParam() const;
void setMaskParam(Value *);
/// \return the vector length parameter or nullptr. /// \return the vector length parameter or nullptr.
Value *getVectorLengthParam() const; Value *getVectorLengthParam() const;
void setVectorLengthParam(Value *);
/// \return whether the vector length param can be ignored. /// \return whether the vector length param can be ignored.
bool canIgnoreVectorLengthParam() const; bool canIgnoreVectorLengthParam() const;

View File

@ -154,7 +154,6 @@ void initializeEntryExitInstrumenterPass(PassRegistry&);
void initializeExpandMemCmpPassPass(PassRegistry&); void initializeExpandMemCmpPassPass(PassRegistry&);
void initializeExpandPostRAPass(PassRegistry&); void initializeExpandPostRAPass(PassRegistry&);
void initializeExpandReductionsPass(PassRegistry&); void initializeExpandReductionsPass(PassRegistry&);
void initializeExpandVectorPredicationPass(PassRegistry &);
void initializeMakeGuardsExplicitLegacyPassPass(PassRegistry&); void initializeMakeGuardsExplicitLegacyPassPass(PassRegistry&);
void initializeExternalAAWrapperPassPass(PassRegistry&); void initializeExternalAAWrapperPassPass(PassRegistry&);
void initializeFEntryInserterPass(PassRegistry&); void initializeFEntryInserterPass(PassRegistry&);

View File

@ -197,7 +197,6 @@ namespace {
(void) llvm::createMergeFunctionsPass(); (void) llvm::createMergeFunctionsPass();
(void) llvm::createMergeICmpsLegacyPass(); (void) llvm::createMergeICmpsLegacyPass();
(void) llvm::createExpandMemCmpPass(); (void) llvm::createExpandMemCmpPass();
(void) llvm::createExpandVectorPredicationPass();
std::string buf; std::string buf;
llvm::raw_string_ostream os(buf); llvm::raw_string_ostream os(buf);
(void) llvm::createPrintModulePass(os); (void) llvm::createPrintModulePass(os);

View File

@ -1026,11 +1026,6 @@ bool TargetTransformInfo::preferPredicatedReductionSelect(
return TTIImpl->preferPredicatedReductionSelect(Opcode, Ty, Flags); return TTIImpl->preferPredicatedReductionSelect(Opcode, Ty, Flags);
} }
TargetTransformInfo::VPLegalization
TargetTransformInfo::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
return TTIImpl->getVPLegalizationStrategy(VPI);
}
bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
return TTIImpl->shouldExpandReduction(II); return TTIImpl->shouldExpandReduction(II);
} }

View File

@ -29,7 +29,6 @@ add_llvm_component_library(LLVMCodeGen
ExpandMemCmp.cpp ExpandMemCmp.cpp
ExpandPostRAPseudos.cpp ExpandPostRAPseudos.cpp
ExpandReductions.cpp ExpandReductions.cpp
ExpandVectorPredication.cpp
FaultMaps.cpp FaultMaps.cpp
FEntryInserter.cpp FEntryInserter.cpp
FinalizeISel.cpp FinalizeISel.cpp

View File

@ -1,469 +0,0 @@
//===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass implements IR expansion for vector predication intrinsics, allowing
// targets to enable vector predication until just before codegen.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/ExpandVectorPredication.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
using namespace llvm;
using VPLegalization = TargetTransformInfo::VPLegalization;
using VPTransform = TargetTransformInfo::VPLegalization::VPTransform;
// Keep this in sync with TargetTransformInfo::VPLegalization.
#define VPINTERNAL_VPLEGAL_CASES \
VPINTERNAL_CASE(Legal) \
VPINTERNAL_CASE(Discard) \
VPINTERNAL_CASE(Convert)
#define VPINTERNAL_CASE(X) "|" #X
// Override options.
static cl::opt<std::string> EVLTransformOverride(
"expandvp-override-evl-transform", cl::init(""), cl::Hidden,
cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
". If non-empty, ignore "
"TargetTransformInfo and "
"always use this transformation for the %evl parameter (Used in "
"testing)."));
static cl::opt<std::string> MaskTransformOverride(
"expandvp-override-mask-transform", cl::init(""), cl::Hidden,
cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
". If non-empty, Ignore "
"TargetTransformInfo and "
"always use this transformation for the %mask parameter (Used in "
"testing)."));
#undef VPINTERNAL_CASE
#define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X)
static VPTransform parseOverrideOption(const std::string &TextOpt) {
return StringSwitch<VPTransform>(TextOpt) VPINTERNAL_VPLEGAL_CASES;
}
#undef VPINTERNAL_VPLEGAL_CASES
// Whether any override options are set.
static bool anyExpandVPOverridesSet() {
return !EVLTransformOverride.empty() || !MaskTransformOverride.empty();
}
#define DEBUG_TYPE "expandvp"
STATISTIC(NumFoldedVL, "Number of folded vector length params");
STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations");
///// Helpers {
/// \returns Whether the vector mask \p MaskVal has all lane bits set.
static bool isAllTrueMask(Value *MaskVal) {
auto *ConstVec = dyn_cast<ConstantVector>(MaskVal);
return ConstVec && ConstVec->isAllOnesValue();
}
/// \returns A non-excepting divisor constant for this type.
static Constant *getSafeDivisor(Type *DivTy) {
assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type");
return ConstantInt::get(DivTy, 1u, false);
}
/// Transfer operation properties from \p OldVPI to \p NewVal.
static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) {
auto *NewInst = dyn_cast<Instruction>(&NewVal);
if (!NewInst || !isa<FPMathOperator>(NewVal))
return;
auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI);
if (!OldFMOp)
return;
NewInst->setFastMathFlags(OldFMOp->getFastMathFlags());
}
/// Transfer all properties from \p OldOp to \p NewOp and replace all uses.
/// OldVP gets erased.
static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) {
transferDecorations(NewOp, OldOp);
OldOp.replaceAllUsesWith(&NewOp);
OldOp.eraseFromParent();
}
//// } Helpers
namespace {
// Expansion pass state at function scope.
struct CachingVPExpander {
Function &F;
const TargetTransformInfo &TTI;
/// \returns A (fixed length) vector with ascending integer indices
/// (<0, 1, ..., NumElems-1>).
/// \p Builder
/// Used for instruction creation.
/// \p LaneTy
/// Integer element type of the result vector.
/// \p NumElems
/// Number of vector elements.
Value *createStepVector(IRBuilder<> &Builder, Type *LaneTy,
unsigned NumElems);
/// \returns A bitmask that is true where the lane position is less-than \p
/// EVLParam
///
/// \p Builder
/// Used for instruction creation.
/// \p VLParam
/// The explicit vector length parameter to test against the lane
/// positions.
/// \p ElemCount
/// Static (potentially scalable) number of vector elements.
Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam,
ElementCount ElemCount);
Value *foldEVLIntoMask(VPIntrinsic &VPI);
/// "Remove" the %evl parameter of \p PI by setting it to the static vector
/// length of the operation.
void discardEVLParameter(VPIntrinsic &PI);
/// \brief Lower this VP binary operator to a unpredicated binary operator.
Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder,
VPIntrinsic &PI);
/// \brief Query TTI and expand the vector predication in \p P accordingly.
Value *expandPredication(VPIntrinsic &PI);
/// \brief Determine how and whether the VPIntrinsic \p VPI shall be
/// expanded. This overrides TTI with the cl::opts listed at the top of this
/// file.
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const;
bool UsingTTIOverrides;
public:
CachingVPExpander(Function &F, const TargetTransformInfo &TTI)
: F(F), TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {}
bool expandVectorPredication();
};
//// CachingVPExpander {
Value *CachingVPExpander::createStepVector(IRBuilder<> &Builder, Type *LaneTy,
unsigned NumElems) {
// TODO add caching
SmallVector<Constant *, 16> ConstElems;
for (unsigned Idx = 0; Idx < NumElems; ++Idx)
ConstElems.push_back(ConstantInt::get(LaneTy, Idx, false));
return ConstantVector::get(ConstElems);
}
Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder,
Value *EVLParam,
ElementCount ElemCount) {
// TODO add caching
// Scalable vector %evl conversion.
if (ElemCount.isScalable()) {
auto *M = Builder.GetInsertBlock()->getModule();
Type *BoolVecTy = VectorType::get(Builder.getInt1Ty(), ElemCount);
Function *ActiveMaskFunc = Intrinsic::getDeclaration(
M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->getType()});
// `get_active_lane_mask` performs an implicit less-than comparison.
Value *ConstZero = Builder.getInt32(0);
return Builder.CreateCall(ActiveMaskFunc, {ConstZero, EVLParam});
}
// Fixed vector %evl conversion.
Type *LaneTy = EVLParam->getType();
unsigned NumElems = ElemCount.getFixedValue();
Value *VLSplat = Builder.CreateVectorSplat(NumElems, EVLParam);
Value *IdxVec = createStepVector(Builder, LaneTy, NumElems);
return Builder.CreateICmp(CmpInst::ICMP_ULT, IdxVec, VLSplat);
}
Value *
CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
VPIntrinsic &VPI) {
assert((isSafeToSpeculativelyExecute(&VPI) ||
VPI.canIgnoreVectorLengthParam()) &&
"Implicitly dropping %evl in non-speculatable operator!");
auto OC = static_cast<Instruction::BinaryOps>(VPI.getFunctionalOpcode());
assert(Instruction::isBinaryOp(OC));
Value *Op0 = VPI.getOperand(0);
Value *Op1 = VPI.getOperand(1);
Value *Mask = VPI.getMaskParam();
// Blend in safe operands.
if (Mask && !isAllTrueMask(Mask)) {
switch (OC) {
default:
// Can safely ignore the predicate.
break;
// Division operators need a safe divisor on masked-off lanes (1).
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
// 2nd operand must not be zero.
Value *SafeDivisor = getSafeDivisor(VPI.getType());
Op1 = Builder.CreateSelect(Mask, Op1, SafeDivisor);
}
}
Value *NewBinOp = Builder.CreateBinOp(OC, Op0, Op1, VPI.getName());
replaceOperation(*NewBinOp, VPI);
return NewBinOp;
}
void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) {
LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n");
if (VPI.canIgnoreVectorLengthParam())
return;
Value *EVLParam = VPI.getVectorLengthParam();
if (!EVLParam)
return;
ElementCount StaticElemCount = VPI.getStaticVectorLength();
Value *MaxEVL = nullptr;
Type *Int32Ty = Type::getInt32Ty(VPI.getContext());
if (StaticElemCount.isScalable()) {
// TODO add caching
auto *M = VPI.getModule();
Function *VScaleFunc =
Intrinsic::getDeclaration(M, Intrinsic::vscale, Int32Ty);
IRBuilder<> Builder(VPI.getParent(), VPI.getIterator());
Value *FactorConst = Builder.getInt32(StaticElemCount.getKnownMinValue());
Value *VScale = Builder.CreateCall(VScaleFunc, {}, "vscale");
MaxEVL = Builder.CreateMul(VScale, FactorConst, "scalable_size",
/*NUW*/ true, /*NSW*/ false);
} else {
MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.getFixedValue(), false);
}
VPI.setVectorLengthParam(MaxEVL);
}
Value *CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) {
LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI << '\n');
IRBuilder<> Builder(&VPI);
// Ineffective %evl parameter and so nothing to do here.
if (VPI.canIgnoreVectorLengthParam())
return &VPI;
// Only VP intrinsics can have an %evl parameter.
Value *OldMaskParam = VPI.getMaskParam();
Value *OldEVLParam = VPI.getVectorLengthParam();
assert(OldMaskParam && "no mask param to fold the vl param into");
assert(OldEVLParam && "no EVL param to fold away");
LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam << '\n');
LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam << '\n');
// Convert the %evl predication into vector mask predication.
ElementCount ElemCount = VPI.getStaticVectorLength();
Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount);
Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam);
VPI.setMaskParam(NewMaskParam);
// Drop the %evl parameter.
discardEVLParameter(VPI);
assert(VPI.canIgnoreVectorLengthParam() &&
"transformation did not render the evl param ineffective!");
// Reassess the modified instruction.
return &VPI;
}
Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI << '\n');
IRBuilder<> Builder(&VPI);
// Try lowering to a LLVM instruction first.
unsigned OC = VPI.getFunctionalOpcode();
if (Instruction::isBinaryOp(OC))
return expandPredicationInBinaryOperator(Builder, VPI);
return &VPI;
}
//// } CachingVPExpander
struct TransformJob {
VPIntrinsic *PI;
TargetTransformInfo::VPLegalization Strategy;
TransformJob(VPIntrinsic *PI, TargetTransformInfo::VPLegalization InitStrat)
: PI(PI), Strategy(InitStrat) {}
bool isDone() const { return Strategy.shouldDoNothing(); }
};
void sanitizeStrategy(Instruction &I, VPLegalization &LegalizeStrat) {
// Speculatable instructions do not strictly need predication.
if (isSafeToSpeculativelyExecute(&I)) {
// Converting a speculatable VP intrinsic means dropping %mask and %evl.
// No need to expand %evl into the %mask only to ignore that code.
if (LegalizeStrat.OpStrategy == VPLegalization::Convert)
LegalizeStrat.EVLParamStrategy = VPLegalization::Discard;
return;
}
// We have to preserve the predicating effect of %evl for this
// non-speculatable VP intrinsic.
// 1) Never discard %evl.
// 2) If this VP intrinsic will be expanded to non-VP code, make sure that
// %evl gets folded into %mask.
if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) ||
(LegalizeStrat.OpStrategy == VPLegalization::Convert)) {
LegalizeStrat.EVLParamStrategy = VPLegalization::Convert;
}
}
VPLegalization
CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
auto VPStrat = TTI.getVPLegalizationStrategy(VPI);
if (LLVM_LIKELY(!UsingTTIOverrides)) {
// No overrides - we are in production.
return VPStrat;
}
// Overrides set - we are in testing, the following does not need to be
// efficient.
VPStrat.EVLParamStrategy = parseOverrideOption(EVLTransformOverride);
VPStrat.OpStrategy = parseOverrideOption(MaskTransformOverride);
return VPStrat;
}
/// \brief Expand llvm.vp.* intrinsics as requested by \p TTI.
bool CachingVPExpander::expandVectorPredication() {
SmallVector<TransformJob, 16> Worklist;
// Collect all VPIntrinsics that need expansion and determine their expansion
// strategy.
for (auto &I : instructions(F)) {
auto *VPI = dyn_cast<VPIntrinsic>(&I);
if (!VPI)
continue;
auto VPStrat = getVPLegalizationStrategy(*VPI);
sanitizeStrategy(I, VPStrat);
if (!VPStrat.shouldDoNothing())
Worklist.emplace_back(VPI, VPStrat);
}
if (Worklist.empty())
return false;
// Transform all VPIntrinsics on the worklist.
LLVM_DEBUG(dbgs() << "\n:::: Transforming " << Worklist.size()
<< " instructions ::::\n");
for (TransformJob Job : Worklist) {
// Transform the EVL parameter.
switch (Job.Strategy.EVLParamStrategy) {
case VPLegalization::Legal:
break;
case VPLegalization::Discard:
discardEVLParameter(*Job.PI);
break;
case VPLegalization::Convert:
if (foldEVLIntoMask(*Job.PI))
++NumFoldedVL;
break;
}
Job.Strategy.EVLParamStrategy = VPLegalization::Legal;
// Replace with a non-predicated operation.
switch (Job.Strategy.OpStrategy) {
case VPLegalization::Legal:
break;
case VPLegalization::Discard:
llvm_unreachable("Invalid strategy for operators.");
case VPLegalization::Convert:
expandPredication(*Job.PI);
++NumLoweredVPOps;
break;
}
Job.Strategy.OpStrategy = VPLegalization::Legal;
assert(Job.isDone() && "incomplete transformation");
}
return true;
}
class ExpandVectorPredication : public FunctionPass {
public:
static char ID;
ExpandVectorPredication() : FunctionPass(ID) {
initializeExpandVectorPredicationPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
CachingVPExpander VPExpander(F, *TTI);
return VPExpander.expandVectorPredication();
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.setPreservesCFG();
}
};
} // namespace
char ExpandVectorPredication::ID;
INITIALIZE_PASS_BEGIN(ExpandVectorPredication, "expandvp",
"Expand vector predication intrinsics", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(ExpandVectorPredication, "expandvp",
"Expand vector predication intrinsics", false, false)
FunctionPass *llvm::createExpandVectorPredicationPass() {
return new ExpandVectorPredication();
}
PreservedAnalyses
ExpandVectorPredicationPass::run(Function &F, FunctionAnalysisManager &AM) {
const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
CachingVPExpander VPExpander(F, TTI);
if (!VPExpander.expandVectorPredication())
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
return PA;
}

View File

@ -864,11 +864,6 @@ void TargetPassConfig::addIRPasses() {
if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining) if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining)
addPass(createPartiallyInlineLibCallsPass()); addPass(createPartiallyInlineLibCallsPass());
// Expand vector predication intrinsics into standard IR instructions.
// This pass has to run before ScalarizeMaskedMemIntrin and ExpandReduction
// passes since it emits those kinds of intrinsics.
addPass(createExpandVectorPredicationPass());
// Add scalarization of target's unsupported masked memory intrinsics pass. // Add scalarization of target's unsupported masked memory intrinsics pass.
// the unsupported intrinsic will be replaced with a chain of basic blocks, // the unsupported intrinsic will be replaced with a chain of basic blocks,
// that stores/loads element one-by-one if the appropriate mask bit is set. // that stores/loads element one-by-one if the appropriate mask bit is set.

View File

@ -279,11 +279,6 @@ Value *VPIntrinsic::getMaskParam() const {
return nullptr; return nullptr;
} }
void VPIntrinsic::setMaskParam(Value *NewMask) {
auto MaskPos = GetMaskParamPos(getIntrinsicID());
setArgOperand(*MaskPos, NewMask);
}
Value *VPIntrinsic::getVectorLengthParam() const { Value *VPIntrinsic::getVectorLengthParam() const {
auto vlenPos = GetVectorLengthParamPos(getIntrinsicID()); auto vlenPos = GetVectorLengthParamPos(getIntrinsicID());
if (vlenPos) if (vlenPos)
@ -291,11 +286,6 @@ Value *VPIntrinsic::getVectorLengthParam() const {
return nullptr; return nullptr;
} }
void VPIntrinsic::setVectorLengthParam(Value *NewEVL) {
auto EVLPos = GetVectorLengthParamPos(getIntrinsicID());
setArgOperand(*EVLPos, NewEVL);
}
Optional<int> VPIntrinsic::GetMaskParamPos(Intrinsic::ID IntrinsicID) { Optional<int> VPIntrinsic::GetMaskParamPos(Intrinsic::ID IntrinsicID) {
switch (IntrinsicID) { switch (IntrinsicID) {
default: default:

View File

@ -21,7 +21,6 @@
; CHECK-NEXT: Shadow Stack GC Lowering ; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Lower constant intrinsics ; CHECK-NEXT: Lower constant intrinsics
; CHECK-NEXT: Remove unreachable blocks from the CFG ; CHECK-NEXT: Remove unreachable blocks from the CFG
; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Scalarize Masked Memory Intrinsics ; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Expand reduction intrinsics
; CHECK-NEXT: AArch64 Stack Tagging ; CHECK-NEXT: AArch64 Stack Tagging

View File

@ -56,7 +56,6 @@
; CHECK-NEXT: Constant Hoisting ; CHECK-NEXT: Constant Hoisting
; CHECK-NEXT: Replace intrinsics with calls to vector library ; CHECK-NEXT: Replace intrinsics with calls to vector library
; CHECK-NEXT: Partially inline calls to library functions ; CHECK-NEXT: Partially inline calls to library functions
; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Scalarize Masked Memory Intrinsics ; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Expand reduction intrinsics
; CHECK-NEXT: Stack Safety Analysis ; CHECK-NEXT: Stack Safety Analysis

View File

@ -37,7 +37,6 @@
; CHECK-NEXT: Constant Hoisting ; CHECK-NEXT: Constant Hoisting
; CHECK-NEXT: Replace intrinsics with calls to vector library ; CHECK-NEXT: Replace intrinsics with calls to vector library
; CHECK-NEXT: Partially inline calls to library functions ; CHECK-NEXT: Partially inline calls to library functions
; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Scalarize Masked Memory Intrinsics ; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Expand reduction intrinsics
; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Natural Loop Information

View File

@ -1,245 +0,0 @@
; Partial expansion cases (still VP with parameter expansions).
; RUN: opt --expandvp --expandvp-override-evl-transform=Legal --expandvp-override-mask-transform=Legal -S < %s | FileCheck %s --check-prefix=LEGAL_LEGAL
; RUN: opt --expandvp --expandvp-override-evl-transform=Discard --expandvp-override-mask-transform=Legal -S < %s | FileCheck %s --check-prefix=DISCARD_LEGAL
; RUN: opt --expandvp --expandvp-override-evl-transform=Convert --expandvp-override-mask-transform=Legal -S < %s | FileCheck %s --check-prefix=CONVERT_LEGAL
; Full expansion cases (all expanded to non-VP).
; RUN: opt --expandvp --expandvp-override-evl-transform=Discard --expandvp-override-mask-transform=Convert -S < %s | FileCheck %s --check-prefix=ALL-CONVERT
; RUN: opt --expandvp -S < %s | FileCheck %s --check-prefix=ALL-CONVERT
; RUN: opt --expandvp --expandvp-override-evl-transform=Legal --expandvp-override-mask-transform=Convert -S < %s | FileCheck %s --check-prefix=ALL-CONVERT
; RUN: opt --expandvp --expandvp-override-evl-transform=Convert --expandvp-override-mask-transform=Convert -S < %s | FileCheck %s --check-prefix=ALL-CONVERT
; Fixed-width vectors
; Integer arith
declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.mul.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.srem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.urem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
; Bit arith
declare <8 x i32> @llvm.vp.and.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.xor.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.or.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
declare <8 x i32> @llvm.vp.shl.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
; Fixed vector test function.
define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
%r0 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%r1 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%r2 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%r3 = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%r7 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%r8 = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%r9 = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%rA = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%rB = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
%rC = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
ret void
}
; Scalable-width vectors
; Integer arith
declare <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
declare <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
declare <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
declare <vscale x 4 x i32> @llvm.vp.sdiv.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
declare <vscale x 4 x i32> @llvm.vp.srem.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
declare <vscale x 4 x i32> @llvm.vp.udiv.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
declare <vscale x 4 x i32> @llvm.vp.urem.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
; Bit arith
declare <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
declare <vscale x 4 x i32> @llvm.vp.xor.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
declare <vscale x 4 x i32> @llvm.vp.or.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
declare <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
declare <vscale x 4 x i32> @llvm.vp.lshr.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
declare <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
; Scalable vector test function.
define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) {
%r0 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
%r1 = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
%r2 = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
%r3 = call <vscale x 4 x i32> @llvm.vp.sdiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
%r4 = call <vscale x 4 x i32> @llvm.vp.srem.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
%r5 = call <vscale x 4 x i32> @llvm.vp.udiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
%r6 = call <vscale x 4 x i32> @llvm.vp.urem.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
%r7 = call <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
%r8 = call <vscale x 4 x i32> @llvm.vp.or.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
%r9 = call <vscale x 4 x i32> @llvm.vp.xor.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
%rA = call <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
%rB = call <vscale x 4 x i32> @llvm.vp.lshr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
%rC = call <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
ret void
}
; All VP intrinsics have to be lowered into non-VP ops
; Convert %evl into %mask for non-speculatable VP intrinsics and emit the
; instruction+select idiom with a non-VP SIMD instruction.
;
; ALL-CONVERT-NOT: {{call.* @llvm.vp.add}}
; ALL-CONVERT-NOT: {{call.* @llvm.vp.sub}}
; ALL-CONVERT-NOT: {{call.* @llvm.vp.mul}}
; ALL-CONVERT-NOT: {{call.* @llvm.vp.sdiv}}
; ALL-CONVERT-NOT: {{call.* @llvm.vp.srem}}
; ALL-CONVERT-NOT: {{call.* @llvm.vp.udiv}}
; ALL-CONVERT-NOT: {{call.* @llvm.vp.urem}}
; ALL-CONVERT-NOT: {{call.* @llvm.vp.and}}
; ALL-CONVERT-NOT: {{call.* @llvm.vp.or}}
; ALL-CONVERT-NOT: {{call.* @llvm.vp.xor}}
; ALL-CONVERT-NOT: {{call.* @llvm.vp.ashr}}
; ALL-CONVERT-NOT: {{call.* @llvm.vp.lshr}}
; ALL-CONVERT-NOT: {{call.* @llvm.vp.shl}}
;
; ALL-CONVERT: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
; ALL-CONVERT-NEXT: %{{.*}} = add <8 x i32> %i0, %i1
; ALL-CONVERT-NEXT: %{{.*}} = sub <8 x i32> %i0, %i1
; ALL-CONVERT-NEXT: %{{.*}} = mul <8 x i32> %i0, %i1
; ALL-CONVERT-NEXT: [[NINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i32 0
; ALL-CONVERT-NEXT: [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NINS]], <8 x i32> poison, <8 x i32> zeroinitializer
; ALL-CONVERT-NEXT: [[EVLM:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
; ALL-CONVERT-NEXT: [[NEWM:%.+]] = and <8 x i1> [[EVLM]], %m
; ALL-CONVERT-NEXT: [[SELONE:%.+]] = select <8 x i1> [[NEWM]], <8 x i32> %i1, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
; ALL-CONVERT-NEXT: %{{.+}} = sdiv <8 x i32> %i0, [[SELONE]]
; ALL-CONVERT-NOT: %{{.+}} = srem <8 x i32> %i0, %i1
; ALL-CONVERT: %{{.+}} = srem <8 x i32> %i0, %{{.+}}
; ALL-CONVERT-NOT: %{{.+}} = udiv <8 x i32> %i0, %i1
; ALL-CONVERT: %{{.+}} = udiv <8 x i32> %i0, %{{.+}}
; ALL-CONVERT-NOT: %{{.+}} = urem <8 x i32> %i0, %i1
; ALL-CONVERT: %{{.+}} = urem <8 x i32> %i0, %{{.+}}
; ALL-CONVERT-NEXT: %{{.+}} = and <8 x i32> %i0, %i1
; ALL-CONVERT-NEXT: %{{.+}} = or <8 x i32> %i0, %i1
; ALL-CONVERT-NEXT: %{{.+}} = xor <8 x i32> %i0, %i1
; ALL-CONVERT-NEXT: %{{.+}} = ashr <8 x i32> %i0, %i1
; ALL-CONVERT-NEXT: %{{.+}} = lshr <8 x i32> %i0, %i1
; ALL-CONVERT-NEXT: %{{.+}} = shl <8 x i32> %i0, %i1
; ALL-CONVERT: ret void
; All legal - don't transform anything.
; LEGAL_LEGAL: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
; LEGAL_LEGAL-NEXT: %r0 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %r1 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %r2 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %r3 = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %r7 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %r8 = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %r9 = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %rA = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %rB = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %rC = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: ret void
; LEGAL_LEGAL:define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) {
; LEGAL_LEGAL-NEXT: %r0 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %r1 = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %r2 = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %r3 = call <vscale x 4 x i32> @llvm.vp.sdiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %r4 = call <vscale x 4 x i32> @llvm.vp.srem.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %r5 = call <vscale x 4 x i32> @llvm.vp.udiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %r6 = call <vscale x 4 x i32> @llvm.vp.urem.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %r7 = call <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %r8 = call <vscale x 4 x i32> @llvm.vp.or.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %r9 = call <vscale x 4 x i32> @llvm.vp.xor.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %rA = call <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %rB = call <vscale x 4 x i32> @llvm.vp.lshr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: %rC = call <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
; LEGAL_LEGAL-NEXT: ret void
; Drop %evl where possible else fold %evl into %mask (%evl Discard, %mask Legal)
;
; There is no caching yet in the ExpandVectorPredication pass and the %evl
; expansion code is emitted for every non-speculatable intrinsic again. Hence,
; only check that..
; (1) The %evl folding code and %mask are correct for the first
; non-speculatable VP intrinsic.
; (2) All other non-speculatable VP intrinsics have a modified mask argument.
; (3) All speculatable VP intrinsics keep their %mask and %evl.
; (4) All VP intrinsics have an ineffective %evl parameter.
; DISCARD_LEGAL: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
; DISCARD_LEGAL-NEXT: %r0 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; DISCARD_LEGAL-NEXT: %r1 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; DISCARD_LEGAL-NEXT: %r2 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; DISCARD_LEGAL-NEXT: [[NSPLATINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i32 0
; DISCARD_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NSPLATINS]], <8 x i32> poison, <8 x i32> zeroinitializer
; DISCARD_LEGAL-NEXT: [[EVLMASK:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
; DISCARD_LEGAL-NEXT: [[NEWMASK:%.+]] = and <8 x i1> [[EVLMASK]], %m
; DISCARD_LEGAL-NEXT: %r3 = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> [[NEWMASK]], i32 8)
; DISCARD_LEGAL-NOT: %r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; DISCARD_LEGAL-NOT: %r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; DISCARD_LEGAL-NOT: %r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; DISCARD_LEGAL: %r7 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; DISCARD_LEGAL-NEXT: %r8 = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; DISCARD_LEGAL-NEXT: %r9 = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; DISCARD_LEGAL-NEXT: %rA = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; DISCARD_LEGAL-NEXT: %rB = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; DISCARD_LEGAL-NEXT: %rC = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; DISCARD_LEGAL-NEXT: ret void
; TODO compute vscale only once and use caching.
; In the meantime, we only check for the correct vscale code for the first VP
; intrinsic and skip over it for all others.
; DISCARD_LEGAL: define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) {
; DISCARD_LEGAL-NEXT: %vscale = call i32 @llvm.vscale.i32()
; DISCARD_LEGAL-NEXT: %scalable_size = mul nuw i32 %vscale, 4
; DISCARD_LEGAL-NEXT: %r0 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %scalable_size)
; DISCARD_LEGAL: %r1 = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %scalable_size{{.*}})
; DISCARD_LEGAL: %r2 = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %scalable_size{{.*}})
; DISCARD_LEGAL: [[EVLM:%.+]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 %n)
; DISCARD_LEGAL: [[NEWM:%.+]] = and <vscale x 4 x i1> [[EVLM]], %m
; DISCARD_LEGAL: %r3 = call <vscale x 4 x i32> @llvm.vp.sdiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> [[NEWM]], i32 %scalable_size{{.*}})
; DISCARD_LEGAL-NOT: %{{.+}} = call <vscale x 4 x i32> @llvm.vp.{{.*}}, i32 %n)
; DISCARD_LEGAL: ret void
; Convert %evl into %mask everywhere (%evl Convert, %mask Legal)
;
; For the same reasons as in the (%evl Discard, %mask Legal) case only check that..
; (1) The %evl folding code and %mask are correct for the first VP intrinsic.
; (2) All other VP intrinsics have a modified mask argument.
; (3) All VP intrinsics have an ineffective %evl parameter.
;
; CONVERT_LEGAL: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
; CONVERT_LEGAL-NEXT: [[NINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i32 0
; CONVERT_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NINS]], <8 x i32> poison, <8 x i32> zeroinitializer
; CONVERT_LEGAL-NEXT: [[EVLM:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
; CONVERT_LEGAL-NEXT: [[NEWM:%.+]] = and <8 x i1> [[EVLM]], %m
; CONVERT_LEGAL-NEXT: %{{.+}} = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> [[NEWM]], i32 8)
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
; CONVERT_LEGAL: ret void
; Similar to %evl discard, %mask legal but make sure the first VP intrinsic has a legal expansion
; CONVERT_LEGAL: define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) {
; CONVERT_LEGAL-NEXT: [[EVLM:%.+]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 %n)
; CONVERT_LEGAL-NEXT: [[NEWM:%.+]] = and <vscale x 4 x i1> [[EVLM]], %m
; CONVERT_LEGAL-NEXT: %vscale = call i32 @llvm.vscale.i32()
; CONVERT_LEGAL-NEXT: %scalable_size = mul nuw i32 %vscale, 4
; CONVERT_LEGAL-NEXT: %r0 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> [[NEWM]], i32 %scalable_size)
; CONVERT_LEGAL-NOT: %{{.*}} = call <vscale x 4 x i32> @llvm.vp.{{.*}}, i32 %n)
; CONVERT_LEGAL: ret void

View File

@ -25,7 +25,6 @@
; CHECK-NEXT: Shadow Stack GC Lowering ; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Lower constant intrinsics ; CHECK-NEXT: Lower constant intrinsics
; CHECK-NEXT: Remove unreachable blocks from the CFG ; CHECK-NEXT: Remove unreachable blocks from the CFG
; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Scalarize Masked Memory Intrinsics ; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Expand reduction intrinsics
; CHECK-NEXT: Expand indirectbr instructions ; CHECK-NEXT: Expand indirectbr instructions

View File

@ -54,7 +54,6 @@
; CHECK-NEXT: Constant Hoisting ; CHECK-NEXT: Constant Hoisting
; CHECK-NEXT: Replace intrinsics with calls to vector library ; CHECK-NEXT: Replace intrinsics with calls to vector library
; CHECK-NEXT: Partially inline calls to library functions ; CHECK-NEXT: Partially inline calls to library functions
; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Scalarize Masked Memory Intrinsics ; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Expand reduction intrinsics
; CHECK-NEXT: Interleaved Access Pass ; CHECK-NEXT: Interleaved Access Pass

View File

@ -352,7 +352,6 @@ int main(int argc, char **argv) {
initializeVectorization(*Registry); initializeVectorization(*Registry);
initializeScalarizeMaskedMemIntrinLegacyPassPass(*Registry); initializeScalarizeMaskedMemIntrinLegacyPassPass(*Registry);
initializeExpandReductionsPass(*Registry); initializeExpandReductionsPass(*Registry);
initializeExpandVectorPredicationPass(*Registry);
initializeHardwareLoopsPass(*Registry); initializeHardwareLoopsPass(*Registry);
initializeTransformUtils(*Registry); initializeTransformUtils(*Registry);
initializeReplaceWithVeclibLegacyPass(*Registry); initializeReplaceWithVeclibLegacyPass(*Registry);

View File

@ -513,7 +513,7 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
"safe-stack", "cost-model", "safe-stack", "cost-model",
"codegenprepare", "interleaved-load-combine", "codegenprepare", "interleaved-load-combine",
"unreachableblockelim", "verify-safepoint-ir", "unreachableblockelim", "verify-safepoint-ir",
"atomic-expand", "expandvp", "atomic-expand",
"hardware-loops", "type-promotion", "hardware-loops", "type-promotion",
"mve-tail-predication", "interleaved-access", "mve-tail-predication", "interleaved-access",
"global-merge", "pre-isel-intrinsic-lowering", "global-merge", "pre-isel-intrinsic-lowering",
@ -591,7 +591,6 @@ int main(int argc, char **argv) {
initializePostInlineEntryExitInstrumenterPass(Registry); initializePostInlineEntryExitInstrumenterPass(Registry);
initializeUnreachableBlockElimLegacyPassPass(Registry); initializeUnreachableBlockElimLegacyPassPass(Registry);
initializeExpandReductionsPass(Registry); initializeExpandReductionsPass(Registry);
initializeExpandVectorPredicationPass(Registry);
initializeWasmEHPreparePass(Registry); initializeWasmEHPreparePass(Registry);
initializeWriteBitcodePassPass(Registry); initializeWriteBitcodePassPass(Registry);
initializeHardwareLoopsPass(Registry); initializeHardwareLoopsPass(Registry);