forked from OSchip/llvm-project
Reland "[LV] Calculate max feasible scalable VF."
Relands https://reviews.llvm.org/D98509
This reverts commit 51d648c119
.
This commit is contained in:
parent
bfb9c749c0
commit
9931ae645e
|
@ -174,6 +174,13 @@ void reportVectorizationFailure(const StringRef DebugMsg,
|
|||
const StringRef OREMsg, const StringRef ORETag,
|
||||
OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr);
|
||||
|
||||
/// Reports an informative message: print \p Msg for debugging purposes as well
|
||||
/// as an optimization remark. Uses either \p I as location of the remark, or
|
||||
/// otherwise \p TheLoop.
|
||||
void reportVectorizationInfo(const StringRef OREMsg, const StringRef ORETag,
|
||||
OptimizationRemarkEmitter *ORE, Loop *TheLoop,
|
||||
Instruction *I = nullptr);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H
|
||||
|
|
|
@ -1065,13 +1065,13 @@ void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr)
|
|||
B.SetCurrentDebugLocation(DebugLoc());
|
||||
}
|
||||
|
||||
/// Write a record \p DebugMsg about vectorization failure to the debug
|
||||
/// output stream. If \p I is passed, it is an instruction that prevents
|
||||
/// vectorization.
|
||||
/// Write a \p DebugMsg about vectorization to the debug output stream. If \p I
|
||||
/// is passed, the message relates to that particular instruction.
|
||||
#ifndef NDEBUG
|
||||
static void debugVectorizationFailure(const StringRef DebugMsg,
|
||||
Instruction *I) {
|
||||
dbgs() << "LV: Not vectorizing: " << DebugMsg;
|
||||
static void debugVectorizationMessage(const StringRef Prefix,
|
||||
const StringRef DebugMsg,
|
||||
Instruction *I) {
|
||||
dbgs() << "LV: " << Prefix << DebugMsg;
|
||||
if (I != nullptr)
|
||||
dbgs() << " " << *I;
|
||||
else
|
||||
|
@ -1100,9 +1100,7 @@ static OptimizationRemarkAnalysis createLVAnalysis(const char *PassName,
|
|||
DL = I->getDebugLoc();
|
||||
}
|
||||
|
||||
OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);
|
||||
R << "loop not vectorized: ";
|
||||
return R;
|
||||
return OptimizationRemarkAnalysis(PassName, RemarkName, DL, CodeRegion);
|
||||
}
|
||||
|
||||
/// Return a value for Step multiplied by VF.
|
||||
|
@ -1123,12 +1121,24 @@ Value *getRuntimeVF(IRBuilder<> &B, Type *Ty, ElementCount VF) {
|
|||
}
|
||||
|
||||
void reportVectorizationFailure(const StringRef DebugMsg,
|
||||
const StringRef OREMsg, const StringRef ORETag,
|
||||
OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I) {
|
||||
LLVM_DEBUG(debugVectorizationFailure(DebugMsg, I));
|
||||
const StringRef OREMsg, const StringRef ORETag,
|
||||
OptimizationRemarkEmitter *ORE, Loop *TheLoop,
|
||||
Instruction *I) {
|
||||
LLVM_DEBUG(debugVectorizationMessage("Not vectorizing: ", DebugMsg, I));
|
||||
LoopVectorizeHints Hints(TheLoop, true /* doesn't matter */, *ORE);
|
||||
ORE->emit(createLVAnalysis(Hints.vectorizeAnalysisPassName(),
|
||||
ORETag, TheLoop, I) << OREMsg);
|
||||
ORE->emit(
|
||||
createLVAnalysis(Hints.vectorizeAnalysisPassName(), ORETag, TheLoop, I)
|
||||
<< "loop not vectorized: " << OREMsg);
|
||||
}
|
||||
|
||||
void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag,
|
||||
OptimizationRemarkEmitter *ORE, Loop *TheLoop,
|
||||
Instruction *I) {
|
||||
LLVM_DEBUG(debugVectorizationMessage("", Msg, I));
|
||||
LoopVectorizeHints Hints(TheLoop, true /* doesn't matter */, *ORE);
|
||||
ORE->emit(
|
||||
createLVAnalysis(Hints.vectorizeAnalysisPassName(), ORETag, TheLoop, I)
|
||||
<< Msg);
|
||||
}
|
||||
|
||||
} // end namespace llvm
|
||||
|
@ -1623,6 +1633,23 @@ private:
|
|||
ElementCount computeFeasibleMaxVF(unsigned ConstTripCount,
|
||||
ElementCount UserVF);
|
||||
|
||||
/// \return the maximized element count based on the targets vector
|
||||
/// registers and the loop trip-count, but limited to a maximum safe VF.
|
||||
/// This is a helper function of computeFeasibleMaxVF.
|
||||
/// FIXME: MaxSafeVF is currently passed by reference to avoid some obscure
|
||||
/// issue that occurred on one of the buildbots which cannot be reproduced
|
||||
/// without having access to the properietary compiler (see comments on
|
||||
/// D98509). The issue is currently under investigation and this workaround
|
||||
/// will be removed as soon as possible.
|
||||
ElementCount getMaximizedVFForTarget(unsigned ConstTripCount,
|
||||
unsigned SmallestType,
|
||||
unsigned WidestType,
|
||||
const ElementCount &MaxSafeVF);
|
||||
|
||||
/// \return the maximum legal scalable VF, based on the safe max number
|
||||
/// of elements.
|
||||
ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements);
|
||||
|
||||
/// The vectorization cost is a combination of the cost itself and a boolean
|
||||
/// indicating whether any of the contributing operations will actually
|
||||
/// operate on
|
||||
|
@ -5582,6 +5609,130 @@ bool LoopVectorizationCostModel::runtimeChecksRequired() {
|
|||
return false;
|
||||
}
|
||||
|
||||
ElementCount
|
||||
LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
|
||||
if (!TTI.supportsScalableVectors() && !ForceTargetSupportsScalableVectors) {
|
||||
reportVectorizationInfo(
|
||||
"Disabling scalable vectorization, because target does not "
|
||||
"support scalable vectors.",
|
||||
"ScalableVectorsUnsupported", ORE, TheLoop);
|
||||
return ElementCount::getScalable(0);
|
||||
}
|
||||
|
||||
auto MaxScalableVF = ElementCount::getScalable(
|
||||
std::numeric_limits<ElementCount::ScalarTy>::max());
|
||||
|
||||
// Disable scalable vectorization if the loop contains unsupported reductions.
|
||||
// Test that the loop-vectorizer can legalize all operations for this MaxVF.
|
||||
// FIXME: While for scalable vectors this is currently sufficient, this should
|
||||
// be replaced by a more detailed mechanism that filters out specific VFs,
|
||||
// instead of invalidating vectorization for a whole set of VFs based on the
|
||||
// MaxVF.
|
||||
if (!canVectorizeReductions(MaxScalableVF)) {
|
||||
reportVectorizationInfo(
|
||||
"Scalable vectorization not supported for the reduction "
|
||||
"operations found in this loop.",
|
||||
"ScalableVFUnfeasible", ORE, TheLoop);
|
||||
return ElementCount::getScalable(0);
|
||||
}
|
||||
|
||||
if (Legal->isSafeForAnyVectorWidth())
|
||||
return MaxScalableVF;
|
||||
|
||||
// Limit MaxScalableVF by the maximum safe dependence distance.
|
||||
Optional<unsigned> MaxVScale = TTI.getMaxVScale();
|
||||
MaxScalableVF = ElementCount::getScalable(
|
||||
MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
|
||||
if (!MaxScalableVF)
|
||||
reportVectorizationInfo(
|
||||
"Max legal vector width too small, scalable vectorization "
|
||||
"unfeasible.",
|
||||
"ScalableVFUnfeasible", ORE, TheLoop);
|
||||
|
||||
return MaxScalableVF;
|
||||
}
|
||||
|
||||
ElementCount
|
||||
LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount,
|
||||
ElementCount UserVF) {
|
||||
MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
|
||||
unsigned SmallestType, WidestType;
|
||||
std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
|
||||
|
||||
// Get the maximum safe dependence distance in bits computed by LAA.
|
||||
// It is computed by MaxVF * sizeOf(type) * 8, where type is taken from
|
||||
// the memory accesses that is most restrictive (involved in the smallest
|
||||
// dependence distance).
|
||||
unsigned MaxSafeElements =
|
||||
PowerOf2Floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
|
||||
|
||||
auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElements);
|
||||
auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElements);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "LV: The max safe fixed VF is: " << MaxSafeFixedVF
|
||||
<< ".\n");
|
||||
LLVM_DEBUG(dbgs() << "LV: The max safe scalable VF is: " << MaxSafeScalableVF
|
||||
<< ".\n");
|
||||
|
||||
// First analyze the UserVF, fall back if the UserVF should be ignored.
|
||||
if (UserVF) {
|
||||
auto MaxSafeUserVF =
|
||||
UserVF.isScalable() ? MaxSafeScalableVF : MaxSafeFixedVF;
|
||||
|
||||
if (ElementCount::isKnownLE(UserVF, MaxSafeUserVF))
|
||||
return UserVF;
|
||||
|
||||
assert(ElementCount::isKnownGT(UserVF, MaxSafeUserVF));
|
||||
|
||||
// Only clamp if the UserVF is not scalable. If the UserVF is scalable, it
|
||||
// is better to ignore the hint and let the compiler choose a suitable VF.
|
||||
if (!UserVF.isScalable()) {
|
||||
LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
|
||||
<< " is unsafe, clamping to max safe VF="
|
||||
<< MaxSafeFixedVF << ".\n");
|
||||
ORE->emit([&]() {
|
||||
return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
|
||||
TheLoop->getStartLoc(),
|
||||
TheLoop->getHeader())
|
||||
<< "User-specified vectorization factor "
|
||||
<< ore::NV("UserVectorizationFactor", UserVF)
|
||||
<< " is unsafe, clamping to maximum safe vectorization factor "
|
||||
<< ore::NV("VectorizationFactor", MaxSafeFixedVF);
|
||||
});
|
||||
return MaxSafeFixedVF;
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
|
||||
<< " is unsafe. Ignoring scalable UserVF.\n");
|
||||
ORE->emit([&]() {
|
||||
return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
|
||||
TheLoop->getStartLoc(),
|
||||
TheLoop->getHeader())
|
||||
<< "User-specified vectorization factor "
|
||||
<< ore::NV("UserVectorizationFactor", UserVF)
|
||||
<< " is unsafe. Ignoring the hint to let the compiler pick a "
|
||||
"suitable VF.";
|
||||
});
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType
|
||||
<< " / " << WidestType << " bits.\n");
|
||||
|
||||
ElementCount MaxFixedVF = ElementCount::getFixed(1);
|
||||
if (auto MaxVF = getMaximizedVFForTarget(ConstTripCount, SmallestType,
|
||||
WidestType, MaxSafeFixedVF))
|
||||
MaxFixedVF = MaxVF;
|
||||
|
||||
if (auto MaxVF = getMaximizedVFForTarget(ConstTripCount, SmallestType,
|
||||
WidestType, MaxSafeScalableVF))
|
||||
// FIXME: Return scalable VF as well (to be added in future patch).
|
||||
if (MaxVF.isScalable())
|
||||
LLVM_DEBUG(dbgs() << "LV: Found feasible scalable VF = " << MaxVF
|
||||
<< "\n");
|
||||
|
||||
return MaxFixedVF;
|
||||
}
|
||||
|
||||
Optional<ElementCount>
|
||||
LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
|
||||
if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) {
|
||||
|
@ -5722,149 +5873,61 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
|
|||
return None;
|
||||
}
|
||||
|
||||
ElementCount
|
||||
LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount,
|
||||
ElementCount UserVF) {
|
||||
bool IgnoreScalableUserVF = UserVF.isScalable() &&
|
||||
!TTI.supportsScalableVectors() &&
|
||||
!ForceTargetSupportsScalableVectors;
|
||||
if (IgnoreScalableUserVF) {
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "LV: Ignoring VF=" << UserVF
|
||||
<< " because target does not support scalable vectors.\n");
|
||||
ORE->emit([&]() {
|
||||
return OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreScalableUserVF",
|
||||
TheLoop->getStartLoc(),
|
||||
TheLoop->getHeader())
|
||||
<< "Ignoring VF=" << ore::NV("UserVF", UserVF)
|
||||
<< " because target does not support scalable vectors.";
|
||||
});
|
||||
}
|
||||
ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
|
||||
unsigned ConstTripCount, unsigned SmallestType, unsigned WidestType,
|
||||
const ElementCount &MaxSafeVF) {
|
||||
bool ComputeScalableMaxVF = MaxSafeVF.isScalable();
|
||||
TypeSize WidestRegister = TTI.getRegisterBitWidth(
|
||||
ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
|
||||
: TargetTransformInfo::RGK_FixedWidthVector);
|
||||
|
||||
// Beyond this point two scenarios are handled. If UserVF isn't specified
|
||||
// then a suitable VF is chosen. If UserVF is specified and there are
|
||||
// dependencies, check if it's legal. However, if a UserVF is specified and
|
||||
// there are no dependencies, then there's nothing to do.
|
||||
if (UserVF.isNonZero() && !IgnoreScalableUserVF) {
|
||||
if (!canVectorizeReductions(UserVF)) {
|
||||
reportVectorizationFailure(
|
||||
"LV: Scalable vectorization not supported for the reduction "
|
||||
"operations found in this loop. Using fixed-width "
|
||||
"vectorization instead.",
|
||||
"Scalable vectorization not supported for the reduction operations "
|
||||
"found in this loop. Using fixed-width vectorization instead.",
|
||||
"ScalableVFUnfeasible", ORE, TheLoop);
|
||||
return computeFeasibleMaxVF(
|
||||
ConstTripCount, ElementCount::getFixed(UserVF.getKnownMinValue()));
|
||||
}
|
||||
|
||||
if (Legal->isSafeForAnyVectorWidth())
|
||||
return UserVF;
|
||||
}
|
||||
|
||||
MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
|
||||
unsigned SmallestType, WidestType;
|
||||
std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
|
||||
unsigned WidestRegister =
|
||||
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
|
||||
.getFixedSize();
|
||||
|
||||
// Get the maximum safe dependence distance in bits computed by LAA.
|
||||
// It is computed by MaxVF * sizeOf(type) * 8, where type is taken from
|
||||
// the memory accesses that is most restrictive (involved in the smallest
|
||||
// dependence distance).
|
||||
unsigned MaxSafeVectorWidthInBits = Legal->getMaxSafeVectorWidthInBits();
|
||||
|
||||
// If the user vectorization factor is legally unsafe, clamp it to a safe
|
||||
// value. Otherwise, return as is.
|
||||
if (UserVF.isNonZero() && !IgnoreScalableUserVF) {
|
||||
unsigned MaxSafeElements =
|
||||
PowerOf2Floor(MaxSafeVectorWidthInBits / WidestType);
|
||||
ElementCount MaxSafeVF = ElementCount::getFixed(MaxSafeElements);
|
||||
|
||||
if (UserVF.isScalable()) {
|
||||
Optional<unsigned> MaxVScale = TTI.getMaxVScale();
|
||||
|
||||
// Scale VF by vscale before checking if it's safe.
|
||||
MaxSafeVF = ElementCount::getScalable(
|
||||
MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
|
||||
|
||||
if (MaxSafeVF.isZero()) {
|
||||
// The dependence distance is too small to use scalable vectors,
|
||||
// fallback on fixed.
|
||||
LLVM_DEBUG(
|
||||
dbgs()
|
||||
<< "LV: Max legal vector width too small, scalable vectorization "
|
||||
"unfeasible. Using fixed-width vectorization instead.\n");
|
||||
ORE->emit([&]() {
|
||||
return OptimizationRemarkAnalysis(DEBUG_TYPE, "ScalableVFUnfeasible",
|
||||
TheLoop->getStartLoc(),
|
||||
TheLoop->getHeader())
|
||||
<< "Max legal vector width too small, scalable vectorization "
|
||||
<< "unfeasible. Using fixed-width vectorization instead.";
|
||||
});
|
||||
return computeFeasibleMaxVF(
|
||||
ConstTripCount, ElementCount::getFixed(UserVF.getKnownMinValue()));
|
||||
}
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "LV: The max safe VF is: " << MaxSafeVF << ".\n");
|
||||
|
||||
if (ElementCount::isKnownLE(UserVF, MaxSafeVF))
|
||||
return UserVF;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
|
||||
<< " is unsafe, clamping to max safe VF=" << MaxSafeVF
|
||||
<< ".\n");
|
||||
ORE->emit([&]() {
|
||||
return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
|
||||
TheLoop->getStartLoc(),
|
||||
TheLoop->getHeader())
|
||||
<< "User-specified vectorization factor "
|
||||
<< ore::NV("UserVectorizationFactor", UserVF)
|
||||
<< " is unsafe, clamping to maximum safe vectorization factor "
|
||||
<< ore::NV("VectorizationFactor", MaxSafeVF);
|
||||
});
|
||||
return MaxSafeVF;
|
||||
}
|
||||
|
||||
WidestRegister = std::min(WidestRegister, MaxSafeVectorWidthInBits);
|
||||
// Convenience function to return the minimum of two ElementCounts.
|
||||
auto MinVF = [](const ElementCount &LHS, const ElementCount &RHS) {
|
||||
assert((LHS.isScalable() == RHS.isScalable()) &&
|
||||
"Scalable flags must match");
|
||||
return ElementCount::isKnownLT(LHS, RHS) ? LHS : RHS;
|
||||
};
|
||||
|
||||
// Ensure MaxVF is a power of 2; the dependence distance bound may not be.
|
||||
// Note that both WidestRegister and WidestType may not be a powers of 2.
|
||||
auto MaxVectorSize =
|
||||
ElementCount::getFixed(PowerOf2Floor(WidestRegister / WidestType));
|
||||
|
||||
LLVM_DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType
|
||||
<< " / " << WidestType << " bits.\n");
|
||||
auto MaxVectorElementCount = ElementCount::get(
|
||||
PowerOf2Floor(WidestRegister.getKnownMinSize() / WidestType),
|
||||
ComputeScalableMaxVF);
|
||||
MaxVectorElementCount = MinVF(MaxVectorElementCount, MaxSafeVF);
|
||||
LLVM_DEBUG(dbgs() << "LV: The Widest register safe to use is: "
|
||||
<< WidestRegister << " bits.\n");
|
||||
<< (MaxVectorElementCount * WidestType) << " bits.\n");
|
||||
|
||||
assert(MaxVectorSize.getFixedValue() <= WidestRegister &&
|
||||
"Did not expect to pack so many elements"
|
||||
" into one vector!");
|
||||
if (MaxVectorSize.getFixedValue() == 0) {
|
||||
if (!MaxVectorElementCount) {
|
||||
LLVM_DEBUG(dbgs() << "LV: The target has no vector registers.\n");
|
||||
return ElementCount::getFixed(1);
|
||||
} else if (ConstTripCount && ConstTripCount < MaxVectorSize.getFixedValue() &&
|
||||
isPowerOf2_32(ConstTripCount)) {
|
||||
// We need to clamp the VF to be the ConstTripCount. There is no point in
|
||||
// choosing a higher viable VF as done in the loop below.
|
||||
LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to the constant trip count: "
|
||||
<< ConstTripCount << "\n");
|
||||
return ElementCount::getFixed(ConstTripCount);
|
||||
}
|
||||
|
||||
ElementCount MaxVF = MaxVectorSize;
|
||||
const auto TripCountEC = ElementCount::getFixed(ConstTripCount);
|
||||
if (ConstTripCount &&
|
||||
ElementCount::isKnownLE(TripCountEC, MaxVectorElementCount) &&
|
||||
isPowerOf2_32(ConstTripCount)) {
|
||||
// We need to clamp the VF to be the ConstTripCount. There is no point in
|
||||
// choosing a higher viable VF as done in the loop below. If
|
||||
// MaxVectorElementCount is scalable, we only fall back on a fixed VF when
|
||||
// the TC is less than or equal to the known number of lanes.
|
||||
LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to the constant trip count: "
|
||||
<< ConstTripCount << "\n");
|
||||
return TripCountEC;
|
||||
}
|
||||
|
||||
ElementCount MaxVF = MaxVectorElementCount;
|
||||
if (TTI.shouldMaximizeVectorBandwidth() ||
|
||||
(MaximizeBandwidth && isScalarEpilogueAllowed())) {
|
||||
auto MaxVectorElementCountMaxBW = ElementCount::get(
|
||||
PowerOf2Floor(WidestRegister.getKnownMinSize() / SmallestType),
|
||||
ComputeScalableMaxVF);
|
||||
MaxVectorElementCountMaxBW = MinVF(MaxVectorElementCountMaxBW, MaxSafeVF);
|
||||
|
||||
// Collect all viable vectorization factors larger than the default MaxVF
|
||||
// (i.e. MaxVectorSize).
|
||||
// (i.e. MaxVectorElementCount).
|
||||
SmallVector<ElementCount, 8> VFs;
|
||||
auto MaxVectorSizeMaxBW =
|
||||
ElementCount::getFixed(WidestRegister / SmallestType);
|
||||
for (ElementCount VS = MaxVectorSize * 2;
|
||||
ElementCount::isKnownLE(VS, MaxVectorSizeMaxBW); VS *= 2)
|
||||
for (ElementCount VS = MaxVectorElementCount * 2;
|
||||
ElementCount::isKnownLE(VS, MaxVectorElementCountMaxBW); VS *= 2)
|
||||
VFs.push_back(VS);
|
||||
|
||||
// For each VF calculate its register usage.
|
||||
|
@ -5885,7 +5948,7 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount,
|
|||
}
|
||||
}
|
||||
if (ElementCount MinVF =
|
||||
TTI.getMinimumVF(SmallestType, /*IsScalable=*/false)) {
|
||||
TTI.getMinimumVF(SmallestType, ComputeScalableMaxVF)) {
|
||||
if (ElementCount::isKnownLT(MaxVF, MinVF)) {
|
||||
LLVM_DEBUG(dbgs() << "LV: Overriding calculated MaxVF(" << MaxVF
|
||||
<< ") with target's minimum: " << MinVF << '\n');
|
||||
|
|
|
@ -221,7 +221,7 @@ for.end:
|
|||
ret float %add
|
||||
}
|
||||
|
||||
; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop. Using fixed-width vectorization instead.
|
||||
; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
|
||||
; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2)
|
||||
define bfloat @fadd_fast_bfloat(bfloat* noalias nocapture readonly %a, i64 %n) {
|
||||
; CHECK-LABEL: @fadd_fast_bfloat
|
||||
|
@ -322,18 +322,18 @@ for.end:
|
|||
|
||||
; MUL
|
||||
|
||||
; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop. Using fixed-width vectorization instead.
|
||||
; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2)
|
||||
; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
|
||||
; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2)
|
||||
define i32 @mul(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
|
||||
; CHECK-LABEL: @mul
|
||||
; CHECK: vector.body:
|
||||
; CHECK: %[[LOAD1:.*]] = load <8 x i32>
|
||||
; CHECK: %[[LOAD2:.*]] = load <8 x i32>
|
||||
; CHECK: %[[MUL1:.*]] = mul <8 x i32> %[[LOAD1]]
|
||||
; CHECK: %[[MUL2:.*]] = mul <8 x i32> %[[LOAD2]]
|
||||
; CHECK: %[[LOAD1:.*]] = load <4 x i32>
|
||||
; CHECK: %[[LOAD2:.*]] = load <4 x i32>
|
||||
; CHECK: %[[MUL1:.*]] = mul <4 x i32> %[[LOAD1]]
|
||||
; CHECK: %[[MUL2:.*]] = mul <4 x i32> %[[LOAD2]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK: %[[RDX:.*]] = mul <8 x i32> %[[MUL2]], %[[MUL1]]
|
||||
; CHECK: call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %[[RDX]])
|
||||
; CHECK: %[[RDX:.*]] = mul <4 x i32> %[[MUL2]], %[[MUL1]]
|
||||
; CHECK: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %[[RDX]])
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
|
@ -352,22 +352,22 @@ for.end: ; preds = %for.body, %entry
|
|||
}
|
||||
|
||||
; Note: This test was added to ensure we always check the legality of reductions (end emit a warning if necessary) before checking for memory dependencies
|
||||
; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop. Using fixed-width vectorization instead.
|
||||
; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2)
|
||||
; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
|
||||
; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2)
|
||||
define i32 @memory_dependence(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) {
|
||||
; CHECK-LABEL: @memory_dependence
|
||||
; CHECK: vector.body:
|
||||
; CHECK: %[[LOAD1:.*]] = load <8 x i32>
|
||||
; CHECK: %[[LOAD2:.*]] = load <8 x i32>
|
||||
; CHECK: %[[LOAD3:.*]] = load <8 x i32>
|
||||
; CHECK: %[[LOAD4:.*]] = load <8 x i32>
|
||||
; CHECK: %[[ADD1:.*]] = add nsw <8 x i32> %[[LOAD3]], %[[LOAD1]]
|
||||
; CHECK: %[[ADD2:.*]] = add nsw <8 x i32> %[[LOAD4]], %[[LOAD2]]
|
||||
; CHECK: %[[MUL1:.*]] = mul <8 x i32> %[[LOAD3]]
|
||||
; CHECK: %[[MUL2:.*]] = mul <8 x i32> %[[LOAD4]]
|
||||
; CHECK: %[[LOAD1:.*]] = load <4 x i32>
|
||||
; CHECK: %[[LOAD2:.*]] = load <4 x i32>
|
||||
; CHECK: %[[LOAD3:.*]] = load <4 x i32>
|
||||
; CHECK: %[[LOAD4:.*]] = load <4 x i32>
|
||||
; CHECK: %[[ADD1:.*]] = add nsw <4 x i32> %[[LOAD3]], %[[LOAD1]]
|
||||
; CHECK: %[[ADD2:.*]] = add nsw <4 x i32> %[[LOAD4]], %[[LOAD2]]
|
||||
; CHECK: %[[MUL1:.*]] = mul <4 x i32> %[[LOAD3]]
|
||||
; CHECK: %[[MUL2:.*]] = mul <4 x i32> %[[LOAD4]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK: %[[RDX:.*]] = mul <8 x i32> %[[MUL2]], %[[MUL1]]
|
||||
; CHECK: call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %[[RDX]])
|
||||
; CHECK: %[[RDX:.*]] = mul <4 x i32> %[[MUL2]], %[[MUL1]]
|
||||
; CHECK: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %[[RDX]])
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
|
|
|
@ -0,0 +1,149 @@
|
|||
; REQUIRES: asserts
|
||||
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON
|
||||
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON_MAXBW
|
||||
|
||||
; Test that the MaxVF for the following loop, that has no dependence distances,
|
||||
; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
|
||||
; (maximized bandwidth for i8 in the loop).
|
||||
define void @test0(i32* %a, i8* %b, i32* %c) {
|
||||
; CHECK: LV: Checking a loop in "test0"
|
||||
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 16
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %c, i64 %iv
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
|
||||
%1 = load i8, i8* %arrayidx2, align 4
|
||||
%zext = zext i8 %1 to i32
|
||||
%add = add nsw i32 %zext, %0
|
||||
%arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %iv
|
||||
store i32 %add, i32* %arrayidx5, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond.not = icmp eq i64 %iv.next, 1024
|
||||
br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test that the MaxVF for the following loop, with a dependence distance
|
||||
; of 64 elements, is calculated as (maxvscale = 16) * 4.
|
||||
define void @test1(i32* %a, i8* %b) {
|
||||
; CHECK: LV: Checking a loop in "test1"
|
||||
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 4
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
|
||||
%1 = load i8, i8* %arrayidx2, align 4
|
||||
%zext = zext i8 %1 to i32
|
||||
%add = add nsw i32 %zext, %0
|
||||
%2 = add nuw nsw i64 %iv, 64
|
||||
%arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
|
||||
store i32 %add, i32* %arrayidx5, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond.not = icmp eq i64 %iv.next, 1024
|
||||
br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test that the MaxVF for the following loop, with a dependence distance
|
||||
; of 32 elements, is calculated as (maxvscale = 16) * 2.
|
||||
define void @test2(i32* %a, i8* %b) {
|
||||
; CHECK: LV: Checking a loop in "test2"
|
||||
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 2
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
|
||||
%1 = load i8, i8* %arrayidx2, align 4
|
||||
%zext = zext i8 %1 to i32
|
||||
%add = add nsw i32 %zext, %0
|
||||
%2 = add nuw nsw i64 %iv, 32
|
||||
%arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
|
||||
store i32 %add, i32* %arrayidx5, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond.not = icmp eq i64 %iv.next, 1024
|
||||
br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test that the MaxVF for the following loop, with a dependence distance
|
||||
; of 16 elements, is calculated as (maxvscale = 16) * 1.
|
||||
define void @test3(i32* %a, i8* %b) {
|
||||
; CHECK: LV: Checking a loop in "test3"
|
||||
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
|
||||
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 1
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
|
||||
%1 = load i8, i8* %arrayidx2, align 4
|
||||
%zext = zext i8 %1 to i32
|
||||
%add = add nsw i32 %zext, %0
|
||||
%2 = add nuw nsw i64 %iv, 16
|
||||
%arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
|
||||
store i32 %add, i32* %arrayidx5, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond.not = icmp eq i64 %iv.next, 1024
|
||||
br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test the fallback mechanism when scalable vectors are not feasible due
|
||||
; to e.g. dependence distance. For the '-scalable-vectorization=exclusive'
|
||||
; it shouldn't try to vectorize with fixed-width vectors.
|
||||
define void @test4(i32* %a, i32* %b) {
|
||||
; CHECK: LV: Checking a loop in "test4"
|
||||
; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
|
||||
; CHECK_SCALABLE_ON_MAXBW-NOT: LV: Found feasible scalable VF
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
|
||||
%1 = load i32, i32* %arrayidx2, align 4
|
||||
%add = add nsw i32 %1, %0
|
||||
%2 = add nuw nsw i64 %iv, 8
|
||||
%arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
|
||||
store i32 %add, i32* %arrayidx5, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond.not = icmp eq i64 %iv.next, 1024
|
||||
br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !2
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = distinct !{!0, !1}
|
||||
!1 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
!2 = distinct !{!2, !3, !4}
|
||||
!3 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
!4 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
|
|
@ -37,9 +37,10 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|||
; unless max(vscale)=2 it's unsafe to vectorize. For SVE max(vscale)=16, check
|
||||
; fixed-width vectorization is used instead.
|
||||
|
||||
; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead.
|
||||
; CHECK-DBG: remark: <unknown>:0:0: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead.
|
||||
; CHECK-DBG: LV: The max safe VF is: 8.
|
||||
; CHECK-DBG: LV: Checking a loop in "test1"
|
||||
; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible.
|
||||
; CHECK-DBG: remark: <unknown>:0:0: Max legal vector width too small, scalable vectorization unfeasible.
|
||||
; CHECK-DBG: LV: The max safe fixed VF is: 8.
|
||||
; CHECK-DBG: LV: Selecting VF: 4.
|
||||
; CHECK-LABEL: @test1
|
||||
; CHECK: <4 x i32>
|
||||
|
@ -80,9 +81,10 @@ exit:
|
|||
; }
|
||||
; }
|
||||
|
||||
; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead.
|
||||
; CHECK-DBG: LV: The max safe VF is: 4.
|
||||
; CHECK-DBG: LV: User VF=8 is unsafe, clamping to max safe VF=4.
|
||||
; CHECK-DBG: LV: Checking a loop in "test2"
|
||||
; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible.
|
||||
; CHECK-DBG: LV: The max safe fixed VF is: 4.
|
||||
; CHECK-DBG: LV: User VF=vscale x 8 is unsafe. Ignoring scalable UserVF.
|
||||
; CHECK-DBG: LV: Selecting VF: 4.
|
||||
; CHECK-LABEL: @test2
|
||||
; CHECK: <4 x i32>
|
||||
|
@ -129,7 +131,7 @@ exit:
|
|||
; Max fixed VF=32, Max scalable VF=2, safe to vectorize.
|
||||
|
||||
; CHECK-DBG-LABEL: LV: Checking a loop in "test3"
|
||||
; CHECK-DBG: LV: The max safe VF is: vscale x 2.
|
||||
; CHECK-DBG: LV: The max safe scalable VF is: vscale x 2.
|
||||
; CHECK-DBG: LV: Using user VF vscale x 2.
|
||||
; CHECK-LABEL: @test3
|
||||
; CHECK: <vscale x 2 x i32>
|
||||
|
@ -161,7 +163,8 @@ exit:
|
|||
|
||||
; test4
|
||||
;
|
||||
; Scalable vectorization feasible, but the VF is unsafe. Should clamp.
|
||||
; Scalable vectorization feasible, but the given VF is unsafe. Should ignore
|
||||
; the hint and leave it to the vectorizer to pick a more suitable VF.
|
||||
;
|
||||
; Specifies a vector of <vscale x 4 x i32>, i.e. maximum of 64 x i32 with 4
|
||||
; words per 128-bits (packed).
|
||||
|
@ -173,15 +176,16 @@ exit:
|
|||
; }
|
||||
; }
|
||||
;
|
||||
; Max fixed VF=32, Max scalable VF=2, unsafe to vectorize. Should clamp to 2.
|
||||
; Max fixed VF=32, Max scalable VF=2, unsafe to vectorize.
|
||||
|
||||
; CHECK-DBG-LABEL: LV: Checking a loop in "test4"
|
||||
; CHECK-DBG: LV: The max safe VF is: vscale x 2.
|
||||
; CHECK-DBG: LV: User VF=vscale x 4 is unsafe, clamping to max safe VF=vscale x 2.
|
||||
; CHECK-DBG: remark: <unknown>:0:0: User-specified vectorization factor vscale x 4 is unsafe, clamping to maximum safe vectorization factor vscale x 2
|
||||
; CHECK-DBG: LV: Using max VF vscale x 2
|
||||
; CHECK-DBG: LV: The max safe scalable VF is: vscale x 2.
|
||||
; CHECK-DBG: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF.
|
||||
; CHECK-DBG: remark: <unknown>:0:0: User-specified vectorization factor vscale x 4 is unsafe. Ignoring the hint to let the compiler pick a suitable VF.
|
||||
; CHECK-DBG: Found feasible scalable VF = vscale x 2
|
||||
; CHECK-DBG: LV: Selecting VF: 4.
|
||||
; CHECK-LABEL: @test4
|
||||
; CHECK: <vscale x 2 x i32>
|
||||
; CHECK: <4 x i32>
|
||||
define void @test4(i32* %a, i32* %b) {
|
||||
entry:
|
||||
br label %loop
|
||||
|
@ -225,7 +229,7 @@ exit:
|
|||
; Max fixed VF=128, Max scalable VF=8, safe to vectorize.
|
||||
|
||||
; CHECK-DBG-LABEL: LV: Checking a loop in "test5"
|
||||
; CHECK-DBG: LV: The max safe VF is: vscale x 8.
|
||||
; CHECK-DBG: LV: The max safe scalable VF is: vscale x 8.
|
||||
; CHECK-DBG: LV: Using user VF vscale x 4
|
||||
; CHECK-LABEL: @test5
|
||||
; CHECK: <vscale x 4 x i32>
|
||||
|
@ -257,7 +261,8 @@ exit:
|
|||
|
||||
; test6
|
||||
;
|
||||
; Scalable vectorization feasible, but the VF is unsafe. Should clamp.
|
||||
; Scalable vectorization feasible, but the VF is unsafe. Should ignore
|
||||
; the hint and leave it to the vectorizer to pick a more suitable VF.
|
||||
;
|
||||
; Specifies a vector of <vscale x 16 x i32>, i.e. maximum of 256 x i32.
|
||||
;
|
||||
|
@ -268,15 +273,16 @@ exit:
|
|||
; }
|
||||
; }
|
||||
;
|
||||
; Max fixed VF=128, Max scalable VF=8, unsafe to vectorize. Should clamp to 8.
|
||||
; Max fixed VF=128, Max scalable VF=8, unsafe to vectorize.
|
||||
|
||||
; CHECK-DBG-LABEL: LV: Checking a loop in "test6"
|
||||
; CHECK-DBG: LV: The max safe VF is: vscale x 8.
|
||||
; CHECK-DBG: LV: User VF=vscale x 16 is unsafe, clamping to max safe VF=vscale x 8.
|
||||
; CHECK-DBG: remark: <unknown>:0:0: User-specified vectorization factor vscale x 16 is unsafe, clamping to maximum safe vectorization factor vscale x 8
|
||||
; CHECK-DBG: LV: Using max VF vscale x 8
|
||||
; CHECK-DBG: LV: The max safe scalable VF is: vscale x 8.
|
||||
; CHECK-DBG: LV: User VF=vscale x 16 is unsafe. Ignoring scalable UserVF.
|
||||
; CHECK-DBG: remark: <unknown>:0:0: User-specified vectorization factor vscale x 16 is unsafe. Ignoring the hint to let the compiler pick a suitable VF.
|
||||
; CHECK-DBG: LV: Found feasible scalable VF = vscale x 4
|
||||
; CHECK-DBG: Selecting VF: 4.
|
||||
; CHECK-LABEL: @test6
|
||||
; CHECK: <vscale x 8 x i32>
|
||||
; CHECK: <4 x i32>
|
||||
define void @test6(i32* %a, i32* %b) {
|
||||
entry:
|
||||
br label %loop
|
||||
|
@ -304,8 +310,9 @@ exit:
|
|||
!17 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
|
||||
|
||||
; CHECK-NO-SVE-LABEL: LV: Checking a loop in "test_no_sve"
|
||||
; CHECK-NO-SVE: LV: Ignoring VF=vscale x 4 because target does not support scalable vectors.
|
||||
; CHECK-NO-SVE: remark: <unknown>:0:0: Ignoring VF=vscale x 4 because target does not support scalable vectors.
|
||||
; CHECK-NO-SVE: LV: Disabling scalable vectorization, because target does not support scalable vectors.
|
||||
; CHECK-NO-SVE: remark: <unknown>:0:0: Disabling scalable vectorization, because target does not support scalable vectors.
|
||||
; CHECK-NO-SVE: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF.
|
||||
; CHECK-NO-SVE: LV: Selecting VF: 4.
|
||||
; CHECK-NO-SVE: <4 x i32>
|
||||
; CHECK-NO-SVE-NOT: <vscale x 4 x i32>
|
||||
|
@ -337,8 +344,8 @@ exit:
|
|||
; supported but max vscale is undefined.
|
||||
;
|
||||
; CHECK-NO-MAX-VSCALE-LABEL: LV: Checking a loop in "test_no_max_vscale"
|
||||
; CHECK-NO-MAX-VSCALE: LV: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead.
|
||||
; CEHCK-NO-MAX-VSCALE: The max safe VF is: 4.
|
||||
; CEHCK-NO-MAX-VSCALE: The max safe fixed VF is: 4.
|
||||
; CHECK-NO-MAX-VSCALE: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF.
|
||||
; CHECK-NO-MAX-VSCALE: LV: Selecting VF: 4.
|
||||
; CHECK-NO-MAX-VSCALE: <4 x i32>
|
||||
define void @test_no_max_vscale(i32* %a, i32* %b) {
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
|
||||
; CHECK: LV: Ignoring VF=vscale x 4 because target does not support scalable vectors.
|
||||
; CHECK: remark: <unknown>:0:0: Ignoring VF=vscale x 4 because target does not support scalable vectors.
|
||||
; CHECK: LV: Disabling scalable vectorization, because target does not support scalable vectors.
|
||||
; CHECK: remark: <unknown>:0:0: Disabling scalable vectorization, because target does not support scalable vectors.
|
||||
; CHECK: LV: The Widest register safe to use is: 32 bits.
|
||||
define void @test1(i32* %a, i32* %b) {
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue