forked from OSchip/llvm-project
Revert "[VP,Integer,#2] ExpandVectorPredication pass"
This reverts commit 43bc584dc0
.
The commit broke the -DLLVM_ENABLE_MODULES=1 builds.
http://green.lab.llvm.org/green/view/LLDB/job/lldb-cmake/31603/consoleFull#2136199809a1ca8a51-895e-46c6-af87-ce24fa4cd561
This commit is contained in:
parent
b11e4c9907
commit
02c5ba8679
|
@ -61,7 +61,6 @@ class TargetLibraryInfo;
|
||||||
class Type;
|
class Type;
|
||||||
class User;
|
class User;
|
||||||
class Value;
|
class Value;
|
||||||
class VPIntrinsic;
|
|
||||||
struct KnownBits;
|
struct KnownBits;
|
||||||
template <typename T> class Optional;
|
template <typename T> class Optional;
|
||||||
|
|
||||||
|
@ -1380,38 +1379,6 @@ public:
|
||||||
/// Intrinsics") Use of %evl is discouraged when that is not the case.
|
/// Intrinsics") Use of %evl is discouraged when that is not the case.
|
||||||
bool hasActiveVectorLength() const;
|
bool hasActiveVectorLength() const;
|
||||||
|
|
||||||
struct VPLegalization {
|
|
||||||
enum VPTransform {
|
|
||||||
// keep the predicating parameter
|
|
||||||
Legal = 0,
|
|
||||||
// where legal, discard the predicate parameter
|
|
||||||
Discard = 1,
|
|
||||||
// transform into something else that is also predicating
|
|
||||||
Convert = 2
|
|
||||||
};
|
|
||||||
|
|
||||||
// How to transform the EVL parameter.
|
|
||||||
// Legal: keep the EVL parameter as it is.
|
|
||||||
// Discard: Ignore the EVL parameter where it is safe to do so.
|
|
||||||
// Convert: Fold the EVL into the mask parameter.
|
|
||||||
VPTransform EVLParamStrategy;
|
|
||||||
|
|
||||||
// How to transform the operator.
|
|
||||||
// Legal: The target supports this operator.
|
|
||||||
// Convert: Convert this to a non-VP operation.
|
|
||||||
// The 'Discard' strategy is invalid.
|
|
||||||
VPTransform OpStrategy;
|
|
||||||
|
|
||||||
bool shouldDoNothing() const {
|
|
||||||
return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
|
|
||||||
}
|
|
||||||
VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
|
|
||||||
: EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
/// \returns How the target needs this vector-predicated operation to be
|
|
||||||
/// transformed.
|
|
||||||
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
|
|
||||||
/// @}
|
/// @}
|
||||||
|
|
||||||
/// @}
|
/// @}
|
||||||
|
@ -1721,8 +1688,6 @@ public:
|
||||||
virtual bool supportsScalableVectors() const = 0;
|
virtual bool supportsScalableVectors() const = 0;
|
||||||
virtual bool hasActiveVectorLength() const = 0;
|
virtual bool hasActiveVectorLength() const = 0;
|
||||||
virtual InstructionCost getInstructionLatency(const Instruction *I) = 0;
|
virtual InstructionCost getInstructionLatency(const Instruction *I) = 0;
|
||||||
virtual VPLegalization
|
|
||||||
getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
@ -2294,11 +2259,6 @@ public:
|
||||||
InstructionCost getInstructionLatency(const Instruction *I) override {
|
InstructionCost getInstructionLatency(const Instruction *I) override {
|
||||||
return Impl.getInstructionLatency(I);
|
return Impl.getInstructionLatency(I);
|
||||||
}
|
}
|
||||||
|
|
||||||
VPLegalization
|
|
||||||
getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
|
|
||||||
return Impl.getVPLegalizationStrategy(PI);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|
|
@ -750,13 +750,6 @@ public:
|
||||||
|
|
||||||
bool hasActiveVectorLength() const { return false; }
|
bool hasActiveVectorLength() const { return false; }
|
||||||
|
|
||||||
TargetTransformInfo::VPLegalization
|
|
||||||
getVPLegalizationStrategy(const VPIntrinsic &PI) const {
|
|
||||||
return TargetTransformInfo::VPLegalization(
|
|
||||||
/* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard,
|
|
||||||
/* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// Obtain the minimum required size to hold the value (without the sign)
|
// Obtain the minimum required size to hold the value (without the sign)
|
||||||
// In case of a vector it returns the min required size for one element.
|
// In case of a vector it returns the min required size for one element.
|
||||||
|
|
|
@ -1,23 +0,0 @@
|
||||||
//===-- ExpandVectorPredication.h - Expand vector predication ---*- C++ -*-===//
|
|
||||||
//
|
|
||||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
||||||
// See https://llvm.org/LICENSE.txt for license information.
|
|
||||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef LLVM_CODEGEN_EXPANDVECTORPREDICATION_H
|
|
||||||
#define LLVM_CODEGEN_EXPANDVECTORPREDICATION_H
|
|
||||||
|
|
||||||
#include "llvm/IR/PassManager.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class ExpandVectorPredicationPass
|
|
||||||
: public PassInfoMixin<ExpandVectorPredicationPass> {
|
|
||||||
public:
|
|
||||||
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
|
|
||||||
};
|
|
||||||
} // end namespace llvm
|
|
||||||
|
|
||||||
#endif // LLVM_CODEGEN_EXPANDVECTORPREDICATION_H
|
|
|
@ -103,7 +103,6 @@ MACHINE_FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (
|
||||||
#define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR)
|
#define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR)
|
||||||
#endif
|
#endif
|
||||||
DUMMY_FUNCTION_PASS("expandmemcmp", ExpandMemCmpPass, ())
|
DUMMY_FUNCTION_PASS("expandmemcmp", ExpandMemCmpPass, ())
|
||||||
DUMMY_FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ())
|
|
||||||
DUMMY_FUNCTION_PASS("gc-lowering", GCLoweringPass, ())
|
DUMMY_FUNCTION_PASS("gc-lowering", GCLoweringPass, ())
|
||||||
DUMMY_FUNCTION_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass, ())
|
DUMMY_FUNCTION_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass, ())
|
||||||
DUMMY_FUNCTION_PASS("sjljehprepare", SjLjEHPreparePass, ())
|
DUMMY_FUNCTION_PASS("sjljehprepare", SjLjEHPreparePass, ())
|
||||||
|
|
|
@ -453,11 +453,6 @@ namespace llvm {
|
||||||
// the corresponding function in a vector library (e.g., SVML, libmvec).
|
// the corresponding function in a vector library (e.g., SVML, libmvec).
|
||||||
FunctionPass *createReplaceWithVeclibLegacyPass();
|
FunctionPass *createReplaceWithVeclibLegacyPass();
|
||||||
|
|
||||||
/// This pass expands the vector predication intrinsics into unpredicated
|
|
||||||
/// instructions with selects or just the explicit vector length into the
|
|
||||||
/// predicate mask.
|
|
||||||
FunctionPass *createExpandVectorPredicationPass();
|
|
||||||
|
|
||||||
// This pass expands memcmp() to load/stores.
|
// This pass expands memcmp() to load/stores.
|
||||||
FunctionPass *createExpandMemCmpPass();
|
FunctionPass *createExpandMemCmpPass();
|
||||||
|
|
||||||
|
|
|
@ -400,11 +400,9 @@ public:
|
||||||
|
|
||||||
/// \return the mask parameter or nullptr.
|
/// \return the mask parameter or nullptr.
|
||||||
Value *getMaskParam() const;
|
Value *getMaskParam() const;
|
||||||
void setMaskParam(Value *);
|
|
||||||
|
|
||||||
/// \return the vector length parameter or nullptr.
|
/// \return the vector length parameter or nullptr.
|
||||||
Value *getVectorLengthParam() const;
|
Value *getVectorLengthParam() const;
|
||||||
void setVectorLengthParam(Value *);
|
|
||||||
|
|
||||||
/// \return whether the vector length param can be ignored.
|
/// \return whether the vector length param can be ignored.
|
||||||
bool canIgnoreVectorLengthParam() const;
|
bool canIgnoreVectorLengthParam() const;
|
||||||
|
|
|
@ -154,7 +154,6 @@ void initializeEntryExitInstrumenterPass(PassRegistry&);
|
||||||
void initializeExpandMemCmpPassPass(PassRegistry&);
|
void initializeExpandMemCmpPassPass(PassRegistry&);
|
||||||
void initializeExpandPostRAPass(PassRegistry&);
|
void initializeExpandPostRAPass(PassRegistry&);
|
||||||
void initializeExpandReductionsPass(PassRegistry&);
|
void initializeExpandReductionsPass(PassRegistry&);
|
||||||
void initializeExpandVectorPredicationPass(PassRegistry &);
|
|
||||||
void initializeMakeGuardsExplicitLegacyPassPass(PassRegistry&);
|
void initializeMakeGuardsExplicitLegacyPassPass(PassRegistry&);
|
||||||
void initializeExternalAAWrapperPassPass(PassRegistry&);
|
void initializeExternalAAWrapperPassPass(PassRegistry&);
|
||||||
void initializeFEntryInserterPass(PassRegistry&);
|
void initializeFEntryInserterPass(PassRegistry&);
|
||||||
|
|
|
@ -197,7 +197,6 @@ namespace {
|
||||||
(void) llvm::createMergeFunctionsPass();
|
(void) llvm::createMergeFunctionsPass();
|
||||||
(void) llvm::createMergeICmpsLegacyPass();
|
(void) llvm::createMergeICmpsLegacyPass();
|
||||||
(void) llvm::createExpandMemCmpPass();
|
(void) llvm::createExpandMemCmpPass();
|
||||||
(void) llvm::createExpandVectorPredicationPass();
|
|
||||||
std::string buf;
|
std::string buf;
|
||||||
llvm::raw_string_ostream os(buf);
|
llvm::raw_string_ostream os(buf);
|
||||||
(void) llvm::createPrintModulePass(os);
|
(void) llvm::createPrintModulePass(os);
|
||||||
|
|
|
@ -1026,11 +1026,6 @@ bool TargetTransformInfo::preferPredicatedReductionSelect(
|
||||||
return TTIImpl->preferPredicatedReductionSelect(Opcode, Ty, Flags);
|
return TTIImpl->preferPredicatedReductionSelect(Opcode, Ty, Flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetTransformInfo::VPLegalization
|
|
||||||
TargetTransformInfo::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
|
|
||||||
return TTIImpl->getVPLegalizationStrategy(VPI);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
|
bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
|
||||||
return TTIImpl->shouldExpandReduction(II);
|
return TTIImpl->shouldExpandReduction(II);
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,7 +29,6 @@ add_llvm_component_library(LLVMCodeGen
|
||||||
ExpandMemCmp.cpp
|
ExpandMemCmp.cpp
|
||||||
ExpandPostRAPseudos.cpp
|
ExpandPostRAPseudos.cpp
|
||||||
ExpandReductions.cpp
|
ExpandReductions.cpp
|
||||||
ExpandVectorPredication.cpp
|
|
||||||
FaultMaps.cpp
|
FaultMaps.cpp
|
||||||
FEntryInserter.cpp
|
FEntryInserter.cpp
|
||||||
FinalizeISel.cpp
|
FinalizeISel.cpp
|
||||||
|
|
|
@ -1,469 +0,0 @@
|
||||||
//===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===//
|
|
||||||
//
|
|
||||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
||||||
// See https://llvm.org/LICENSE.txt for license information.
|
|
||||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This pass implements IR expansion for vector predication intrinsics, allowing
|
|
||||||
// targets to enable vector predication until just before codegen.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "llvm/CodeGen/ExpandVectorPredication.h"
|
|
||||||
#include "llvm/ADT/Statistic.h"
|
|
||||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
|
||||||
#include "llvm/Analysis/ValueTracking.h"
|
|
||||||
#include "llvm/CodeGen/Passes.h"
|
|
||||||
#include "llvm/IR/Constants.h"
|
|
||||||
#include "llvm/IR/Function.h"
|
|
||||||
#include "llvm/IR/IRBuilder.h"
|
|
||||||
#include "llvm/IR/InstIterator.h"
|
|
||||||
#include "llvm/IR/Instructions.h"
|
|
||||||
#include "llvm/IR/IntrinsicInst.h"
|
|
||||||
#include "llvm/IR/Intrinsics.h"
|
|
||||||
#include "llvm/IR/Module.h"
|
|
||||||
#include "llvm/InitializePasses.h"
|
|
||||||
#include "llvm/Pass.h"
|
|
||||||
#include "llvm/Support/CommandLine.h"
|
|
||||||
#include "llvm/Support/Compiler.h"
|
|
||||||
#include "llvm/Support/Debug.h"
|
|
||||||
#include "llvm/Support/MathExtras.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
using VPLegalization = TargetTransformInfo::VPLegalization;
|
|
||||||
using VPTransform = TargetTransformInfo::VPLegalization::VPTransform;
|
|
||||||
|
|
||||||
// Keep this in sync with TargetTransformInfo::VPLegalization.
|
|
||||||
#define VPINTERNAL_VPLEGAL_CASES \
|
|
||||||
VPINTERNAL_CASE(Legal) \
|
|
||||||
VPINTERNAL_CASE(Discard) \
|
|
||||||
VPINTERNAL_CASE(Convert)
|
|
||||||
|
|
||||||
#define VPINTERNAL_CASE(X) "|" #X
|
|
||||||
|
|
||||||
// Override options.
|
|
||||||
static cl::opt<std::string> EVLTransformOverride(
|
|
||||||
"expandvp-override-evl-transform", cl::init(""), cl::Hidden,
|
|
||||||
cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
|
|
||||||
". If non-empty, ignore "
|
|
||||||
"TargetTransformInfo and "
|
|
||||||
"always use this transformation for the %evl parameter (Used in "
|
|
||||||
"testing)."));
|
|
||||||
|
|
||||||
static cl::opt<std::string> MaskTransformOverride(
|
|
||||||
"expandvp-override-mask-transform", cl::init(""), cl::Hidden,
|
|
||||||
cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
|
|
||||||
". If non-empty, Ignore "
|
|
||||||
"TargetTransformInfo and "
|
|
||||||
"always use this transformation for the %mask parameter (Used in "
|
|
||||||
"testing)."));
|
|
||||||
|
|
||||||
#undef VPINTERNAL_CASE
|
|
||||||
#define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X)
|
|
||||||
|
|
||||||
static VPTransform parseOverrideOption(const std::string &TextOpt) {
|
|
||||||
return StringSwitch<VPTransform>(TextOpt) VPINTERNAL_VPLEGAL_CASES;
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef VPINTERNAL_VPLEGAL_CASES
|
|
||||||
|
|
||||||
// Whether any override options are set.
|
|
||||||
static bool anyExpandVPOverridesSet() {
|
|
||||||
return !EVLTransformOverride.empty() || !MaskTransformOverride.empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
#define DEBUG_TYPE "expandvp"
|
|
||||||
|
|
||||||
STATISTIC(NumFoldedVL, "Number of folded vector length params");
|
|
||||||
STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations");
|
|
||||||
|
|
||||||
///// Helpers {
|
|
||||||
|
|
||||||
/// \returns Whether the vector mask \p MaskVal has all lane bits set.
|
|
||||||
static bool isAllTrueMask(Value *MaskVal) {
|
|
||||||
auto *ConstVec = dyn_cast<ConstantVector>(MaskVal);
|
|
||||||
return ConstVec && ConstVec->isAllOnesValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \returns A non-excepting divisor constant for this type.
|
|
||||||
static Constant *getSafeDivisor(Type *DivTy) {
|
|
||||||
assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type");
|
|
||||||
return ConstantInt::get(DivTy, 1u, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Transfer operation properties from \p OldVPI to \p NewVal.
|
|
||||||
static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) {
|
|
||||||
auto *NewInst = dyn_cast<Instruction>(&NewVal);
|
|
||||||
if (!NewInst || !isa<FPMathOperator>(NewVal))
|
|
||||||
return;
|
|
||||||
|
|
||||||
auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI);
|
|
||||||
if (!OldFMOp)
|
|
||||||
return;
|
|
||||||
|
|
||||||
NewInst->setFastMathFlags(OldFMOp->getFastMathFlags());
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Transfer all properties from \p OldOp to \p NewOp and replace all uses.
|
|
||||||
/// OldVP gets erased.
|
|
||||||
static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) {
|
|
||||||
transferDecorations(NewOp, OldOp);
|
|
||||||
OldOp.replaceAllUsesWith(&NewOp);
|
|
||||||
OldOp.eraseFromParent();
|
|
||||||
}
|
|
||||||
|
|
||||||
//// } Helpers
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
// Expansion pass state at function scope.
|
|
||||||
struct CachingVPExpander {
|
|
||||||
Function &F;
|
|
||||||
const TargetTransformInfo &TTI;
|
|
||||||
|
|
||||||
/// \returns A (fixed length) vector with ascending integer indices
|
|
||||||
/// (<0, 1, ..., NumElems-1>).
|
|
||||||
/// \p Builder
|
|
||||||
/// Used for instruction creation.
|
|
||||||
/// \p LaneTy
|
|
||||||
/// Integer element type of the result vector.
|
|
||||||
/// \p NumElems
|
|
||||||
/// Number of vector elements.
|
|
||||||
Value *createStepVector(IRBuilder<> &Builder, Type *LaneTy,
|
|
||||||
unsigned NumElems);
|
|
||||||
|
|
||||||
/// \returns A bitmask that is true where the lane position is less-than \p
|
|
||||||
/// EVLParam
|
|
||||||
///
|
|
||||||
/// \p Builder
|
|
||||||
/// Used for instruction creation.
|
|
||||||
/// \p VLParam
|
|
||||||
/// The explicit vector length parameter to test against the lane
|
|
||||||
/// positions.
|
|
||||||
/// \p ElemCount
|
|
||||||
/// Static (potentially scalable) number of vector elements.
|
|
||||||
Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam,
|
|
||||||
ElementCount ElemCount);
|
|
||||||
|
|
||||||
Value *foldEVLIntoMask(VPIntrinsic &VPI);
|
|
||||||
|
|
||||||
/// "Remove" the %evl parameter of \p PI by setting it to the static vector
|
|
||||||
/// length of the operation.
|
|
||||||
void discardEVLParameter(VPIntrinsic &PI);
|
|
||||||
|
|
||||||
/// \brief Lower this VP binary operator to a unpredicated binary operator.
|
|
||||||
Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder,
|
|
||||||
VPIntrinsic &PI);
|
|
||||||
|
|
||||||
/// \brief Query TTI and expand the vector predication in \p P accordingly.
|
|
||||||
Value *expandPredication(VPIntrinsic &PI);
|
|
||||||
|
|
||||||
/// \brief Determine how and whether the VPIntrinsic \p VPI shall be
|
|
||||||
/// expanded. This overrides TTI with the cl::opts listed at the top of this
|
|
||||||
/// file.
|
|
||||||
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const;
|
|
||||||
bool UsingTTIOverrides;
|
|
||||||
|
|
||||||
public:
|
|
||||||
CachingVPExpander(Function &F, const TargetTransformInfo &TTI)
|
|
||||||
: F(F), TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {}
|
|
||||||
|
|
||||||
bool expandVectorPredication();
|
|
||||||
};
|
|
||||||
|
|
||||||
//// CachingVPExpander {
|
|
||||||
|
|
||||||
Value *CachingVPExpander::createStepVector(IRBuilder<> &Builder, Type *LaneTy,
|
|
||||||
unsigned NumElems) {
|
|
||||||
// TODO add caching
|
|
||||||
SmallVector<Constant *, 16> ConstElems;
|
|
||||||
|
|
||||||
for (unsigned Idx = 0; Idx < NumElems; ++Idx)
|
|
||||||
ConstElems.push_back(ConstantInt::get(LaneTy, Idx, false));
|
|
||||||
|
|
||||||
return ConstantVector::get(ConstElems);
|
|
||||||
}
|
|
||||||
|
|
||||||
Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder,
|
|
||||||
Value *EVLParam,
|
|
||||||
ElementCount ElemCount) {
|
|
||||||
// TODO add caching
|
|
||||||
// Scalable vector %evl conversion.
|
|
||||||
if (ElemCount.isScalable()) {
|
|
||||||
auto *M = Builder.GetInsertBlock()->getModule();
|
|
||||||
Type *BoolVecTy = VectorType::get(Builder.getInt1Ty(), ElemCount);
|
|
||||||
Function *ActiveMaskFunc = Intrinsic::getDeclaration(
|
|
||||||
M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->getType()});
|
|
||||||
// `get_active_lane_mask` performs an implicit less-than comparison.
|
|
||||||
Value *ConstZero = Builder.getInt32(0);
|
|
||||||
return Builder.CreateCall(ActiveMaskFunc, {ConstZero, EVLParam});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fixed vector %evl conversion.
|
|
||||||
Type *LaneTy = EVLParam->getType();
|
|
||||||
unsigned NumElems = ElemCount.getFixedValue();
|
|
||||||
Value *VLSplat = Builder.CreateVectorSplat(NumElems, EVLParam);
|
|
||||||
Value *IdxVec = createStepVector(Builder, LaneTy, NumElems);
|
|
||||||
return Builder.CreateICmp(CmpInst::ICMP_ULT, IdxVec, VLSplat);
|
|
||||||
}
|
|
||||||
|
|
||||||
Value *
|
|
||||||
CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
|
|
||||||
VPIntrinsic &VPI) {
|
|
||||||
assert((isSafeToSpeculativelyExecute(&VPI) ||
|
|
||||||
VPI.canIgnoreVectorLengthParam()) &&
|
|
||||||
"Implicitly dropping %evl in non-speculatable operator!");
|
|
||||||
|
|
||||||
auto OC = static_cast<Instruction::BinaryOps>(VPI.getFunctionalOpcode());
|
|
||||||
assert(Instruction::isBinaryOp(OC));
|
|
||||||
|
|
||||||
Value *Op0 = VPI.getOperand(0);
|
|
||||||
Value *Op1 = VPI.getOperand(1);
|
|
||||||
Value *Mask = VPI.getMaskParam();
|
|
||||||
|
|
||||||
// Blend in safe operands.
|
|
||||||
if (Mask && !isAllTrueMask(Mask)) {
|
|
||||||
switch (OC) {
|
|
||||||
default:
|
|
||||||
// Can safely ignore the predicate.
|
|
||||||
break;
|
|
||||||
|
|
||||||
// Division operators need a safe divisor on masked-off lanes (1).
|
|
||||||
case Instruction::UDiv:
|
|
||||||
case Instruction::SDiv:
|
|
||||||
case Instruction::URem:
|
|
||||||
case Instruction::SRem:
|
|
||||||
// 2nd operand must not be zero.
|
|
||||||
Value *SafeDivisor = getSafeDivisor(VPI.getType());
|
|
||||||
Op1 = Builder.CreateSelect(Mask, Op1, SafeDivisor);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Value *NewBinOp = Builder.CreateBinOp(OC, Op0, Op1, VPI.getName());
|
|
||||||
|
|
||||||
replaceOperation(*NewBinOp, VPI);
|
|
||||||
return NewBinOp;
|
|
||||||
}
|
|
||||||
|
|
||||||
void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) {
|
|
||||||
LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n");
|
|
||||||
|
|
||||||
if (VPI.canIgnoreVectorLengthParam())
|
|
||||||
return;
|
|
||||||
|
|
||||||
Value *EVLParam = VPI.getVectorLengthParam();
|
|
||||||
if (!EVLParam)
|
|
||||||
return;
|
|
||||||
|
|
||||||
ElementCount StaticElemCount = VPI.getStaticVectorLength();
|
|
||||||
Value *MaxEVL = nullptr;
|
|
||||||
Type *Int32Ty = Type::getInt32Ty(VPI.getContext());
|
|
||||||
if (StaticElemCount.isScalable()) {
|
|
||||||
// TODO add caching
|
|
||||||
auto *M = VPI.getModule();
|
|
||||||
Function *VScaleFunc =
|
|
||||||
Intrinsic::getDeclaration(M, Intrinsic::vscale, Int32Ty);
|
|
||||||
IRBuilder<> Builder(VPI.getParent(), VPI.getIterator());
|
|
||||||
Value *FactorConst = Builder.getInt32(StaticElemCount.getKnownMinValue());
|
|
||||||
Value *VScale = Builder.CreateCall(VScaleFunc, {}, "vscale");
|
|
||||||
MaxEVL = Builder.CreateMul(VScale, FactorConst, "scalable_size",
|
|
||||||
/*NUW*/ true, /*NSW*/ false);
|
|
||||||
} else {
|
|
||||||
MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.getFixedValue(), false);
|
|
||||||
}
|
|
||||||
VPI.setVectorLengthParam(MaxEVL);
|
|
||||||
}
|
|
||||||
|
|
||||||
Value *CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) {
|
|
||||||
LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI << '\n');
|
|
||||||
|
|
||||||
IRBuilder<> Builder(&VPI);
|
|
||||||
|
|
||||||
// Ineffective %evl parameter and so nothing to do here.
|
|
||||||
if (VPI.canIgnoreVectorLengthParam())
|
|
||||||
return &VPI;
|
|
||||||
|
|
||||||
// Only VP intrinsics can have an %evl parameter.
|
|
||||||
Value *OldMaskParam = VPI.getMaskParam();
|
|
||||||
Value *OldEVLParam = VPI.getVectorLengthParam();
|
|
||||||
assert(OldMaskParam && "no mask param to fold the vl param into");
|
|
||||||
assert(OldEVLParam && "no EVL param to fold away");
|
|
||||||
|
|
||||||
LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam << '\n');
|
|
||||||
LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam << '\n');
|
|
||||||
|
|
||||||
// Convert the %evl predication into vector mask predication.
|
|
||||||
ElementCount ElemCount = VPI.getStaticVectorLength();
|
|
||||||
Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount);
|
|
||||||
Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam);
|
|
||||||
VPI.setMaskParam(NewMaskParam);
|
|
||||||
|
|
||||||
// Drop the %evl parameter.
|
|
||||||
discardEVLParameter(VPI);
|
|
||||||
assert(VPI.canIgnoreVectorLengthParam() &&
|
|
||||||
"transformation did not render the evl param ineffective!");
|
|
||||||
|
|
||||||
// Reassess the modified instruction.
|
|
||||||
return &VPI;
|
|
||||||
}
|
|
||||||
|
|
||||||
Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
|
|
||||||
LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI << '\n');
|
|
||||||
|
|
||||||
IRBuilder<> Builder(&VPI);
|
|
||||||
|
|
||||||
// Try lowering to a LLVM instruction first.
|
|
||||||
unsigned OC = VPI.getFunctionalOpcode();
|
|
||||||
|
|
||||||
if (Instruction::isBinaryOp(OC))
|
|
||||||
return expandPredicationInBinaryOperator(Builder, VPI);
|
|
||||||
|
|
||||||
return &VPI;
|
|
||||||
}
|
|
||||||
|
|
||||||
//// } CachingVPExpander
|
|
||||||
|
|
||||||
struct TransformJob {
|
|
||||||
VPIntrinsic *PI;
|
|
||||||
TargetTransformInfo::VPLegalization Strategy;
|
|
||||||
TransformJob(VPIntrinsic *PI, TargetTransformInfo::VPLegalization InitStrat)
|
|
||||||
: PI(PI), Strategy(InitStrat) {}
|
|
||||||
|
|
||||||
bool isDone() const { return Strategy.shouldDoNothing(); }
|
|
||||||
};
|
|
||||||
|
|
||||||
void sanitizeStrategy(Instruction &I, VPLegalization &LegalizeStrat) {
|
|
||||||
// Speculatable instructions do not strictly need predication.
|
|
||||||
if (isSafeToSpeculativelyExecute(&I)) {
|
|
||||||
// Converting a speculatable VP intrinsic means dropping %mask and %evl.
|
|
||||||
// No need to expand %evl into the %mask only to ignore that code.
|
|
||||||
if (LegalizeStrat.OpStrategy == VPLegalization::Convert)
|
|
||||||
LegalizeStrat.EVLParamStrategy = VPLegalization::Discard;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We have to preserve the predicating effect of %evl for this
|
|
||||||
// non-speculatable VP intrinsic.
|
|
||||||
// 1) Never discard %evl.
|
|
||||||
// 2) If this VP intrinsic will be expanded to non-VP code, make sure that
|
|
||||||
// %evl gets folded into %mask.
|
|
||||||
if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) ||
|
|
||||||
(LegalizeStrat.OpStrategy == VPLegalization::Convert)) {
|
|
||||||
LegalizeStrat.EVLParamStrategy = VPLegalization::Convert;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
VPLegalization
|
|
||||||
CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
|
|
||||||
auto VPStrat = TTI.getVPLegalizationStrategy(VPI);
|
|
||||||
if (LLVM_LIKELY(!UsingTTIOverrides)) {
|
|
||||||
// No overrides - we are in production.
|
|
||||||
return VPStrat;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Overrides set - we are in testing, the following does not need to be
|
|
||||||
// efficient.
|
|
||||||
VPStrat.EVLParamStrategy = parseOverrideOption(EVLTransformOverride);
|
|
||||||
VPStrat.OpStrategy = parseOverrideOption(MaskTransformOverride);
|
|
||||||
return VPStrat;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief Expand llvm.vp.* intrinsics as requested by \p TTI.
|
|
||||||
bool CachingVPExpander::expandVectorPredication() {
|
|
||||||
SmallVector<TransformJob, 16> Worklist;
|
|
||||||
|
|
||||||
// Collect all VPIntrinsics that need expansion and determine their expansion
|
|
||||||
// strategy.
|
|
||||||
for (auto &I : instructions(F)) {
|
|
||||||
auto *VPI = dyn_cast<VPIntrinsic>(&I);
|
|
||||||
if (!VPI)
|
|
||||||
continue;
|
|
||||||
auto VPStrat = getVPLegalizationStrategy(*VPI);
|
|
||||||
sanitizeStrategy(I, VPStrat);
|
|
||||||
if (!VPStrat.shouldDoNothing())
|
|
||||||
Worklist.emplace_back(VPI, VPStrat);
|
|
||||||
}
|
|
||||||
if (Worklist.empty())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// Transform all VPIntrinsics on the worklist.
|
|
||||||
LLVM_DEBUG(dbgs() << "\n:::: Transforming " << Worklist.size()
|
|
||||||
<< " instructions ::::\n");
|
|
||||||
for (TransformJob Job : Worklist) {
|
|
||||||
// Transform the EVL parameter.
|
|
||||||
switch (Job.Strategy.EVLParamStrategy) {
|
|
||||||
case VPLegalization::Legal:
|
|
||||||
break;
|
|
||||||
case VPLegalization::Discard:
|
|
||||||
discardEVLParameter(*Job.PI);
|
|
||||||
break;
|
|
||||||
case VPLegalization::Convert:
|
|
||||||
if (foldEVLIntoMask(*Job.PI))
|
|
||||||
++NumFoldedVL;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Job.Strategy.EVLParamStrategy = VPLegalization::Legal;
|
|
||||||
|
|
||||||
// Replace with a non-predicated operation.
|
|
||||||
switch (Job.Strategy.OpStrategy) {
|
|
||||||
case VPLegalization::Legal:
|
|
||||||
break;
|
|
||||||
case VPLegalization::Discard:
|
|
||||||
llvm_unreachable("Invalid strategy for operators.");
|
|
||||||
case VPLegalization::Convert:
|
|
||||||
expandPredication(*Job.PI);
|
|
||||||
++NumLoweredVPOps;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Job.Strategy.OpStrategy = VPLegalization::Legal;
|
|
||||||
|
|
||||||
assert(Job.isDone() && "incomplete transformation");
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
class ExpandVectorPredication : public FunctionPass {
|
|
||||||
public:
|
|
||||||
static char ID;
|
|
||||||
ExpandVectorPredication() : FunctionPass(ID) {
|
|
||||||
initializeExpandVectorPredicationPass(*PassRegistry::getPassRegistry());
|
|
||||||
}
|
|
||||||
|
|
||||||
bool runOnFunction(Function &F) override {
|
|
||||||
const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
|
|
||||||
CachingVPExpander VPExpander(F, *TTI);
|
|
||||||
return VPExpander.expandVectorPredication();
|
|
||||||
}
|
|
||||||
|
|
||||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
||||||
AU.addRequired<TargetTransformInfoWrapperPass>();
|
|
||||||
AU.setPreservesCFG();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
char ExpandVectorPredication::ID;
|
|
||||||
INITIALIZE_PASS_BEGIN(ExpandVectorPredication, "expandvp",
|
|
||||||
"Expand vector predication intrinsics", false, false)
|
|
||||||
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
|
||||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
|
||||||
INITIALIZE_PASS_END(ExpandVectorPredication, "expandvp",
|
|
||||||
"Expand vector predication intrinsics", false, false)
|
|
||||||
|
|
||||||
FunctionPass *llvm::createExpandVectorPredicationPass() {
|
|
||||||
return new ExpandVectorPredication();
|
|
||||||
}
|
|
||||||
|
|
||||||
PreservedAnalyses
|
|
||||||
ExpandVectorPredicationPass::run(Function &F, FunctionAnalysisManager &AM) {
|
|
||||||
const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
|
|
||||||
CachingVPExpander VPExpander(F, TTI);
|
|
||||||
if (!VPExpander.expandVectorPredication())
|
|
||||||
return PreservedAnalyses::all();
|
|
||||||
PreservedAnalyses PA;
|
|
||||||
PA.preserveSet<CFGAnalyses>();
|
|
||||||
return PA;
|
|
||||||
}
|
|
|
@ -864,11 +864,6 @@ void TargetPassConfig::addIRPasses() {
|
||||||
if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining)
|
if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining)
|
||||||
addPass(createPartiallyInlineLibCallsPass());
|
addPass(createPartiallyInlineLibCallsPass());
|
||||||
|
|
||||||
// Expand vector predication intrinsics into standard IR instructions.
|
|
||||||
// This pass has to run before ScalarizeMaskedMemIntrin and ExpandReduction
|
|
||||||
// passes since it emits those kinds of intrinsics.
|
|
||||||
addPass(createExpandVectorPredicationPass());
|
|
||||||
|
|
||||||
// Add scalarization of target's unsupported masked memory intrinsics pass.
|
// Add scalarization of target's unsupported masked memory intrinsics pass.
|
||||||
// the unsupported intrinsic will be replaced with a chain of basic blocks,
|
// the unsupported intrinsic will be replaced with a chain of basic blocks,
|
||||||
// that stores/loads element one-by-one if the appropriate mask bit is set.
|
// that stores/loads element one-by-one if the appropriate mask bit is set.
|
||||||
|
|
|
@ -279,11 +279,6 @@ Value *VPIntrinsic::getMaskParam() const {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VPIntrinsic::setMaskParam(Value *NewMask) {
|
|
||||||
auto MaskPos = GetMaskParamPos(getIntrinsicID());
|
|
||||||
setArgOperand(*MaskPos, NewMask);
|
|
||||||
}
|
|
||||||
|
|
||||||
Value *VPIntrinsic::getVectorLengthParam() const {
|
Value *VPIntrinsic::getVectorLengthParam() const {
|
||||||
auto vlenPos = GetVectorLengthParamPos(getIntrinsicID());
|
auto vlenPos = GetVectorLengthParamPos(getIntrinsicID());
|
||||||
if (vlenPos)
|
if (vlenPos)
|
||||||
|
@ -291,11 +286,6 @@ Value *VPIntrinsic::getVectorLengthParam() const {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VPIntrinsic::setVectorLengthParam(Value *NewEVL) {
|
|
||||||
auto EVLPos = GetVectorLengthParamPos(getIntrinsicID());
|
|
||||||
setArgOperand(*EVLPos, NewEVL);
|
|
||||||
}
|
|
||||||
|
|
||||||
Optional<int> VPIntrinsic::GetMaskParamPos(Intrinsic::ID IntrinsicID) {
|
Optional<int> VPIntrinsic::GetMaskParamPos(Intrinsic::ID IntrinsicID) {
|
||||||
switch (IntrinsicID) {
|
switch (IntrinsicID) {
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -21,7 +21,6 @@
|
||||||
; CHECK-NEXT: Shadow Stack GC Lowering
|
; CHECK-NEXT: Shadow Stack GC Lowering
|
||||||
; CHECK-NEXT: Lower constant intrinsics
|
; CHECK-NEXT: Lower constant intrinsics
|
||||||
; CHECK-NEXT: Remove unreachable blocks from the CFG
|
; CHECK-NEXT: Remove unreachable blocks from the CFG
|
||||||
; CHECK-NEXT: Expand vector predication intrinsics
|
|
||||||
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
|
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
|
||||||
; CHECK-NEXT: Expand reduction intrinsics
|
; CHECK-NEXT: Expand reduction intrinsics
|
||||||
; CHECK-NEXT: AArch64 Stack Tagging
|
; CHECK-NEXT: AArch64 Stack Tagging
|
||||||
|
|
|
@ -56,7 +56,6 @@
|
||||||
; CHECK-NEXT: Constant Hoisting
|
; CHECK-NEXT: Constant Hoisting
|
||||||
; CHECK-NEXT: Replace intrinsics with calls to vector library
|
; CHECK-NEXT: Replace intrinsics with calls to vector library
|
||||||
; CHECK-NEXT: Partially inline calls to library functions
|
; CHECK-NEXT: Partially inline calls to library functions
|
||||||
; CHECK-NEXT: Expand vector predication intrinsics
|
|
||||||
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
|
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
|
||||||
; CHECK-NEXT: Expand reduction intrinsics
|
; CHECK-NEXT: Expand reduction intrinsics
|
||||||
; CHECK-NEXT: Stack Safety Analysis
|
; CHECK-NEXT: Stack Safety Analysis
|
||||||
|
|
|
@ -37,7 +37,6 @@
|
||||||
; CHECK-NEXT: Constant Hoisting
|
; CHECK-NEXT: Constant Hoisting
|
||||||
; CHECK-NEXT: Replace intrinsics with calls to vector library
|
; CHECK-NEXT: Replace intrinsics with calls to vector library
|
||||||
; CHECK-NEXT: Partially inline calls to library functions
|
; CHECK-NEXT: Partially inline calls to library functions
|
||||||
; CHECK-NEXT: Expand vector predication intrinsics
|
|
||||||
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
|
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
|
||||||
; CHECK-NEXT: Expand reduction intrinsics
|
; CHECK-NEXT: Expand reduction intrinsics
|
||||||
; CHECK-NEXT: Natural Loop Information
|
; CHECK-NEXT: Natural Loop Information
|
||||||
|
|
|
@ -1,245 +0,0 @@
|
||||||
; Partial expansion cases (still VP with parameter expansions).
|
|
||||||
; RUN: opt --expandvp --expandvp-override-evl-transform=Legal --expandvp-override-mask-transform=Legal -S < %s | FileCheck %s --check-prefix=LEGAL_LEGAL
|
|
||||||
; RUN: opt --expandvp --expandvp-override-evl-transform=Discard --expandvp-override-mask-transform=Legal -S < %s | FileCheck %s --check-prefix=DISCARD_LEGAL
|
|
||||||
; RUN: opt --expandvp --expandvp-override-evl-transform=Convert --expandvp-override-mask-transform=Legal -S < %s | FileCheck %s --check-prefix=CONVERT_LEGAL
|
|
||||||
; Full expansion cases (all expanded to non-VP).
|
|
||||||
; RUN: opt --expandvp --expandvp-override-evl-transform=Discard --expandvp-override-mask-transform=Convert -S < %s | FileCheck %s --check-prefix=ALL-CONVERT
|
|
||||||
; RUN: opt --expandvp -S < %s | FileCheck %s --check-prefix=ALL-CONVERT
|
|
||||||
; RUN: opt --expandvp --expandvp-override-evl-transform=Legal --expandvp-override-mask-transform=Convert -S < %s | FileCheck %s --check-prefix=ALL-CONVERT
|
|
||||||
; RUN: opt --expandvp --expandvp-override-evl-transform=Convert --expandvp-override-mask-transform=Convert -S < %s | FileCheck %s --check-prefix=ALL-CONVERT
|
|
||||||
|
|
||||||
|
|
||||||
; Fixed-width vectors
|
|
||||||
; Integer arith
|
|
||||||
declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
|
|
||||||
declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
|
|
||||||
declare <8 x i32> @llvm.vp.mul.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
|
|
||||||
declare <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
|
|
||||||
declare <8 x i32> @llvm.vp.srem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
|
|
||||||
declare <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
|
|
||||||
declare <8 x i32> @llvm.vp.urem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
|
|
||||||
; Bit arith
|
|
||||||
declare <8 x i32> @llvm.vp.and.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
|
|
||||||
declare <8 x i32> @llvm.vp.xor.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
|
|
||||||
declare <8 x i32> @llvm.vp.or.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
|
|
||||||
declare <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
|
|
||||||
declare <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
|
|
||||||
declare <8 x i32> @llvm.vp.shl.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
|
|
||||||
|
|
||||||
; Fixed vector test function.
|
|
||||||
define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
|
|
||||||
%r0 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
%r1 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
%r2 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
%r3 = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
%r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
%r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
%r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
%r7 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
%r8 = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
%r9 = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
%rA = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
%rB = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
%rC = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; Scalable-width vectors
|
|
||||||
; Integer arith
|
|
||||||
declare <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
|
|
||||||
declare <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
|
|
||||||
declare <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
|
|
||||||
declare <vscale x 4 x i32> @llvm.vp.sdiv.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
|
|
||||||
declare <vscale x 4 x i32> @llvm.vp.srem.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
|
|
||||||
declare <vscale x 4 x i32> @llvm.vp.udiv.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
|
|
||||||
declare <vscale x 4 x i32> @llvm.vp.urem.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
|
|
||||||
; Bit arith
|
|
||||||
declare <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
|
|
||||||
declare <vscale x 4 x i32> @llvm.vp.xor.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
|
|
||||||
declare <vscale x 4 x i32> @llvm.vp.or.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
|
|
||||||
declare <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
|
|
||||||
declare <vscale x 4 x i32> @llvm.vp.lshr.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
|
|
||||||
declare <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
|
|
||||||
|
|
||||||
; Scalable vector test function.
|
|
||||||
define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) {
|
|
||||||
%r0 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
%r1 = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
%r2 = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
%r3 = call <vscale x 4 x i32> @llvm.vp.sdiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
%r4 = call <vscale x 4 x i32> @llvm.vp.srem.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
%r5 = call <vscale x 4 x i32> @llvm.vp.udiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
%r6 = call <vscale x 4 x i32> @llvm.vp.urem.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
%r7 = call <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
%r8 = call <vscale x 4 x i32> @llvm.vp.or.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
%r9 = call <vscale x 4 x i32> @llvm.vp.xor.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
%rA = call <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
%rB = call <vscale x 4 x i32> @llvm.vp.lshr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
%rC = call <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
; All VP intrinsics have to be lowered into non-VP ops
|
|
||||||
; Convert %evl into %mask for non-speculatable VP intrinsics and emit the
|
|
||||||
; instruction+select idiom with a non-VP SIMD instruction.
|
|
||||||
;
|
|
||||||
; ALL-CONVERT-NOT: {{call.* @llvm.vp.add}}
|
|
||||||
; ALL-CONVERT-NOT: {{call.* @llvm.vp.sub}}
|
|
||||||
; ALL-CONVERT-NOT: {{call.* @llvm.vp.mul}}
|
|
||||||
; ALL-CONVERT-NOT: {{call.* @llvm.vp.sdiv}}
|
|
||||||
; ALL-CONVERT-NOT: {{call.* @llvm.vp.srem}}
|
|
||||||
; ALL-CONVERT-NOT: {{call.* @llvm.vp.udiv}}
|
|
||||||
; ALL-CONVERT-NOT: {{call.* @llvm.vp.urem}}
|
|
||||||
; ALL-CONVERT-NOT: {{call.* @llvm.vp.and}}
|
|
||||||
; ALL-CONVERT-NOT: {{call.* @llvm.vp.or}}
|
|
||||||
; ALL-CONVERT-NOT: {{call.* @llvm.vp.xor}}
|
|
||||||
; ALL-CONVERT-NOT: {{call.* @llvm.vp.ashr}}
|
|
||||||
; ALL-CONVERT-NOT: {{call.* @llvm.vp.lshr}}
|
|
||||||
; ALL-CONVERT-NOT: {{call.* @llvm.vp.shl}}
|
|
||||||
;
|
|
||||||
; ALL-CONVERT: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
|
|
||||||
; ALL-CONVERT-NEXT: %{{.*}} = add <8 x i32> %i0, %i1
|
|
||||||
; ALL-CONVERT-NEXT: %{{.*}} = sub <8 x i32> %i0, %i1
|
|
||||||
; ALL-CONVERT-NEXT: %{{.*}} = mul <8 x i32> %i0, %i1
|
|
||||||
; ALL-CONVERT-NEXT: [[NINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i32 0
|
|
||||||
; ALL-CONVERT-NEXT: [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NINS]], <8 x i32> poison, <8 x i32> zeroinitializer
|
|
||||||
; ALL-CONVERT-NEXT: [[EVLM:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
|
|
||||||
; ALL-CONVERT-NEXT: [[NEWM:%.+]] = and <8 x i1> [[EVLM]], %m
|
|
||||||
; ALL-CONVERT-NEXT: [[SELONE:%.+]] = select <8 x i1> [[NEWM]], <8 x i32> %i1, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
|
||||||
; ALL-CONVERT-NEXT: %{{.+}} = sdiv <8 x i32> %i0, [[SELONE]]
|
|
||||||
; ALL-CONVERT-NOT: %{{.+}} = srem <8 x i32> %i0, %i1
|
|
||||||
; ALL-CONVERT: %{{.+}} = srem <8 x i32> %i0, %{{.+}}
|
|
||||||
; ALL-CONVERT-NOT: %{{.+}} = udiv <8 x i32> %i0, %i1
|
|
||||||
; ALL-CONVERT: %{{.+}} = udiv <8 x i32> %i0, %{{.+}}
|
|
||||||
; ALL-CONVERT-NOT: %{{.+}} = urem <8 x i32> %i0, %i1
|
|
||||||
; ALL-CONVERT: %{{.+}} = urem <8 x i32> %i0, %{{.+}}
|
|
||||||
; ALL-CONVERT-NEXT: %{{.+}} = and <8 x i32> %i0, %i1
|
|
||||||
; ALL-CONVERT-NEXT: %{{.+}} = or <8 x i32> %i0, %i1
|
|
||||||
; ALL-CONVERT-NEXT: %{{.+}} = xor <8 x i32> %i0, %i1
|
|
||||||
; ALL-CONVERT-NEXT: %{{.+}} = ashr <8 x i32> %i0, %i1
|
|
||||||
; ALL-CONVERT-NEXT: %{{.+}} = lshr <8 x i32> %i0, %i1
|
|
||||||
; ALL-CONVERT-NEXT: %{{.+}} = shl <8 x i32> %i0, %i1
|
|
||||||
; ALL-CONVERT: ret void
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
; All legal - don't transform anything.
|
|
||||||
|
|
||||||
; LEGAL_LEGAL: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
|
|
||||||
; LEGAL_LEGAL-NEXT: %r0 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %r1 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %r2 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %r3 = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %r7 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %r8 = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %r9 = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %rA = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %rB = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %rC = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: ret void
|
|
||||||
|
|
||||||
; LEGAL_LEGAL:define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) {
|
|
||||||
; LEGAL_LEGAL-NEXT: %r0 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %r1 = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %r2 = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %r3 = call <vscale x 4 x i32> @llvm.vp.sdiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %r4 = call <vscale x 4 x i32> @llvm.vp.srem.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %r5 = call <vscale x 4 x i32> @llvm.vp.udiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %r6 = call <vscale x 4 x i32> @llvm.vp.urem.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %r7 = call <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %r8 = call <vscale x 4 x i32> @llvm.vp.or.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %r9 = call <vscale x 4 x i32> @llvm.vp.xor.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %rA = call <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %rB = call <vscale x 4 x i32> @llvm.vp.lshr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: %rC = call <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
|
|
||||||
; LEGAL_LEGAL-NEXT: ret void
|
|
||||||
|
|
||||||
|
|
||||||
; Drop %evl where possible else fold %evl into %mask (%evl Discard, %mask Legal)
|
|
||||||
;
|
|
||||||
; There is no caching yet in the ExpandVectorPredication pass and the %evl
|
|
||||||
; expansion code is emitted for every non-speculatable intrinsic again. Hence,
|
|
||||||
; only check that..
|
|
||||||
; (1) The %evl folding code and %mask are correct for the first
|
|
||||||
; non-speculatable VP intrinsic.
|
|
||||||
; (2) All other non-speculatable VP intrinsics have a modified mask argument.
|
|
||||||
; (3) All speculatable VP intrinsics keep their %mask and %evl.
|
|
||||||
; (4) All VP intrinsics have an ineffective %evl parameter.
|
|
||||||
|
|
||||||
; DISCARD_LEGAL: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
|
|
||||||
; DISCARD_LEGAL-NEXT: %r0 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; DISCARD_LEGAL-NEXT: %r1 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; DISCARD_LEGAL-NEXT: %r2 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; DISCARD_LEGAL-NEXT: [[NSPLATINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i32 0
|
|
||||||
; DISCARD_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NSPLATINS]], <8 x i32> poison, <8 x i32> zeroinitializer
|
|
||||||
; DISCARD_LEGAL-NEXT: [[EVLMASK:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
|
|
||||||
; DISCARD_LEGAL-NEXT: [[NEWMASK:%.+]] = and <8 x i1> [[EVLMASK]], %m
|
|
||||||
; DISCARD_LEGAL-NEXT: %r3 = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> [[NEWMASK]], i32 8)
|
|
||||||
; DISCARD_LEGAL-NOT: %r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; DISCARD_LEGAL-NOT: %r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; DISCARD_LEGAL-NOT: %r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; DISCARD_LEGAL: %r7 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; DISCARD_LEGAL-NEXT: %r8 = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; DISCARD_LEGAL-NEXT: %r9 = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; DISCARD_LEGAL-NEXT: %rA = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; DISCARD_LEGAL-NEXT: %rB = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; DISCARD_LEGAL-NEXT: %rC = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; DISCARD_LEGAL-NEXT: ret void
|
|
||||||
|
|
||||||
; TODO compute vscale only once and use caching.
|
|
||||||
; In the meantime, we only check for the correct vscale code for the first VP
|
|
||||||
; intrinsic and skip over it for all others.
|
|
||||||
|
|
||||||
; DISCARD_LEGAL: define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) {
|
|
||||||
; DISCARD_LEGAL-NEXT: %vscale = call i32 @llvm.vscale.i32()
|
|
||||||
; DISCARD_LEGAL-NEXT: %scalable_size = mul nuw i32 %vscale, 4
|
|
||||||
; DISCARD_LEGAL-NEXT: %r0 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %scalable_size)
|
|
||||||
; DISCARD_LEGAL: %r1 = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %scalable_size{{.*}})
|
|
||||||
; DISCARD_LEGAL: %r2 = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %scalable_size{{.*}})
|
|
||||||
; DISCARD_LEGAL: [[EVLM:%.+]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 %n)
|
|
||||||
; DISCARD_LEGAL: [[NEWM:%.+]] = and <vscale x 4 x i1> [[EVLM]], %m
|
|
||||||
; DISCARD_LEGAL: %r3 = call <vscale x 4 x i32> @llvm.vp.sdiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> [[NEWM]], i32 %scalable_size{{.*}})
|
|
||||||
; DISCARD_LEGAL-NOT: %{{.+}} = call <vscale x 4 x i32> @llvm.vp.{{.*}}, i32 %n)
|
|
||||||
; DISCARD_LEGAL: ret void
|
|
||||||
|
|
||||||
|
|
||||||
; Convert %evl into %mask everywhere (%evl Convert, %mask Legal)
|
|
||||||
;
|
|
||||||
; For the same reasons as in the (%evl Discard, %mask Legal) case only check that..
|
|
||||||
; (1) The %evl folding code and %mask are correct for the first VP intrinsic.
|
|
||||||
; (2) All other VP intrinsics have a modified mask argument.
|
|
||||||
; (3) All VP intrinsics have an ineffective %evl parameter.
|
|
||||||
;
|
|
||||||
; CONVERT_LEGAL: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
|
|
||||||
; CONVERT_LEGAL-NEXT: [[NINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i32 0
|
|
||||||
; CONVERT_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NINS]], <8 x i32> poison, <8 x i32> zeroinitializer
|
|
||||||
; CONVERT_LEGAL-NEXT: [[EVLM:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
|
|
||||||
; CONVERT_LEGAL-NEXT: [[NEWM:%.+]] = and <8 x i1> [[EVLM]], %m
|
|
||||||
; CONVERT_LEGAL-NEXT: %{{.+}} = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> [[NEWM]], i32 8)
|
|
||||||
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
|
|
||||||
; CONVERT_LEGAL: ret void
|
|
||||||
|
|
||||||
; Similar to %evl discard, %mask legal but make sure the first VP intrinsic has a legal expansion
|
|
||||||
; CONVERT_LEGAL: define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) {
|
|
||||||
; CONVERT_LEGAL-NEXT: [[EVLM:%.+]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 %n)
|
|
||||||
; CONVERT_LEGAL-NEXT: [[NEWM:%.+]] = and <vscale x 4 x i1> [[EVLM]], %m
|
|
||||||
; CONVERT_LEGAL-NEXT: %vscale = call i32 @llvm.vscale.i32()
|
|
||||||
; CONVERT_LEGAL-NEXT: %scalable_size = mul nuw i32 %vscale, 4
|
|
||||||
; CONVERT_LEGAL-NEXT: %r0 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> [[NEWM]], i32 %scalable_size)
|
|
||||||
; CONVERT_LEGAL-NOT: %{{.*}} = call <vscale x 4 x i32> @llvm.vp.{{.*}}, i32 %n)
|
|
||||||
; CONVERT_LEGAL: ret void
|
|
||||||
|
|
|
@ -25,7 +25,6 @@
|
||||||
; CHECK-NEXT: Shadow Stack GC Lowering
|
; CHECK-NEXT: Shadow Stack GC Lowering
|
||||||
; CHECK-NEXT: Lower constant intrinsics
|
; CHECK-NEXT: Lower constant intrinsics
|
||||||
; CHECK-NEXT: Remove unreachable blocks from the CFG
|
; CHECK-NEXT: Remove unreachable blocks from the CFG
|
||||||
; CHECK-NEXT: Expand vector predication intrinsics
|
|
||||||
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
|
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
|
||||||
; CHECK-NEXT: Expand reduction intrinsics
|
; CHECK-NEXT: Expand reduction intrinsics
|
||||||
; CHECK-NEXT: Expand indirectbr instructions
|
; CHECK-NEXT: Expand indirectbr instructions
|
||||||
|
|
|
@ -54,7 +54,6 @@
|
||||||
; CHECK-NEXT: Constant Hoisting
|
; CHECK-NEXT: Constant Hoisting
|
||||||
; CHECK-NEXT: Replace intrinsics with calls to vector library
|
; CHECK-NEXT: Replace intrinsics with calls to vector library
|
||||||
; CHECK-NEXT: Partially inline calls to library functions
|
; CHECK-NEXT: Partially inline calls to library functions
|
||||||
; CHECK-NEXT: Expand vector predication intrinsics
|
|
||||||
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
|
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
|
||||||
; CHECK-NEXT: Expand reduction intrinsics
|
; CHECK-NEXT: Expand reduction intrinsics
|
||||||
; CHECK-NEXT: Interleaved Access Pass
|
; CHECK-NEXT: Interleaved Access Pass
|
||||||
|
|
|
@ -352,7 +352,6 @@ int main(int argc, char **argv) {
|
||||||
initializeVectorization(*Registry);
|
initializeVectorization(*Registry);
|
||||||
initializeScalarizeMaskedMemIntrinLegacyPassPass(*Registry);
|
initializeScalarizeMaskedMemIntrinLegacyPassPass(*Registry);
|
||||||
initializeExpandReductionsPass(*Registry);
|
initializeExpandReductionsPass(*Registry);
|
||||||
initializeExpandVectorPredicationPass(*Registry);
|
|
||||||
initializeHardwareLoopsPass(*Registry);
|
initializeHardwareLoopsPass(*Registry);
|
||||||
initializeTransformUtils(*Registry);
|
initializeTransformUtils(*Registry);
|
||||||
initializeReplaceWithVeclibLegacyPass(*Registry);
|
initializeReplaceWithVeclibLegacyPass(*Registry);
|
||||||
|
|
|
@ -513,7 +513,7 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
|
||||||
"safe-stack", "cost-model",
|
"safe-stack", "cost-model",
|
||||||
"codegenprepare", "interleaved-load-combine",
|
"codegenprepare", "interleaved-load-combine",
|
||||||
"unreachableblockelim", "verify-safepoint-ir",
|
"unreachableblockelim", "verify-safepoint-ir",
|
||||||
"atomic-expand", "expandvp",
|
"atomic-expand",
|
||||||
"hardware-loops", "type-promotion",
|
"hardware-loops", "type-promotion",
|
||||||
"mve-tail-predication", "interleaved-access",
|
"mve-tail-predication", "interleaved-access",
|
||||||
"global-merge", "pre-isel-intrinsic-lowering",
|
"global-merge", "pre-isel-intrinsic-lowering",
|
||||||
|
@ -591,7 +591,6 @@ int main(int argc, char **argv) {
|
||||||
initializePostInlineEntryExitInstrumenterPass(Registry);
|
initializePostInlineEntryExitInstrumenterPass(Registry);
|
||||||
initializeUnreachableBlockElimLegacyPassPass(Registry);
|
initializeUnreachableBlockElimLegacyPassPass(Registry);
|
||||||
initializeExpandReductionsPass(Registry);
|
initializeExpandReductionsPass(Registry);
|
||||||
initializeExpandVectorPredicationPass(Registry);
|
|
||||||
initializeWasmEHPreparePass(Registry);
|
initializeWasmEHPreparePass(Registry);
|
||||||
initializeWriteBitcodePassPass(Registry);
|
initializeWriteBitcodePassPass(Registry);
|
||||||
initializeHardwareLoopsPass(Registry);
|
initializeHardwareLoopsPass(Registry);
|
||||||
|
|
Loading…
Reference in New Issue