forked from OSchip/llvm-project
[LV] Apply sink-after & interleave-groups as VPlan transformations (NFCI)
This recommits100e797adb
(reverted in009e032634
for failing an assert). While the root cause was independently reverted ineaff300401
, this commit includes a LIT to make sure IVDescriptor's SinkAfter logic does not try to sink branch instructions.
This commit is contained in:
parent
ef459dedca
commit
11ed1c0239
|
@ -542,13 +542,10 @@ public:
|
||||||
/// formation for predicated accesses, we may be able to relax this limitation
|
/// formation for predicated accesses, we may be able to relax this limitation
|
||||||
/// in the future once we handle more complicated blocks.
|
/// in the future once we handle more complicated blocks.
|
||||||
void reset() {
|
void reset() {
|
||||||
SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet;
|
|
||||||
// Avoid releasing a pointer twice.
|
|
||||||
for (auto &I : InterleaveGroupMap)
|
|
||||||
DelSet.insert(I.second);
|
|
||||||
for (auto *Ptr : DelSet)
|
|
||||||
delete Ptr;
|
|
||||||
InterleaveGroupMap.clear();
|
InterleaveGroupMap.clear();
|
||||||
|
for (auto *Ptr : InterleaveGroups)
|
||||||
|
delete Ptr;
|
||||||
|
InterleaveGroups.clear();
|
||||||
RequiresScalarEpilogue = false;
|
RequiresScalarEpilogue = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -201,6 +201,9 @@ class LoopVectorizationPlanner {
|
||||||
/// The profitability analysis.
|
/// The profitability analysis.
|
||||||
LoopVectorizationCostModel &CM;
|
LoopVectorizationCostModel &CM;
|
||||||
|
|
||||||
|
/// The interleaved access analysis.
|
||||||
|
InterleavedAccessInfo &IAI;
|
||||||
|
|
||||||
SmallVector<VPlanPtr, 4> VPlans;
|
SmallVector<VPlanPtr, 4> VPlans;
|
||||||
|
|
||||||
/// This class is used to enable the VPlan to invoke a method of ILV. This is
|
/// This class is used to enable the VPlan to invoke a method of ILV. This is
|
||||||
|
@ -223,8 +226,10 @@ public:
|
||||||
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI,
|
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI,
|
||||||
const TargetTransformInfo *TTI,
|
const TargetTransformInfo *TTI,
|
||||||
LoopVectorizationLegality *Legal,
|
LoopVectorizationLegality *Legal,
|
||||||
LoopVectorizationCostModel &CM)
|
LoopVectorizationCostModel &CM,
|
||||||
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {}
|
InterleavedAccessInfo &IAI)
|
||||||
|
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
|
||||||
|
IAI(IAI) {}
|
||||||
|
|
||||||
/// Plan how to best vectorize, return the best VF and its cost, or None if
|
/// Plan how to best vectorize, return the best VF and its cost, or None if
|
||||||
/// vectorization and interleaving should be avoided up front.
|
/// vectorization and interleaving should be avoided up front.
|
||||||
|
|
|
@ -6710,37 +6710,6 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
|
||||||
return BlockMaskCache[BB] = BlockMask;
|
return BlockMaskCache[BB] = BlockMask;
|
||||||
}
|
}
|
||||||
|
|
||||||
VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I,
|
|
||||||
VFRange &Range,
|
|
||||||
VPlanPtr &Plan) {
|
|
||||||
const InterleaveGroup<Instruction> *IG = CM.getInterleavedAccessGroup(I);
|
|
||||||
if (!IG)
|
|
||||||
return nullptr;
|
|
||||||
|
|
||||||
// Now check if IG is relevant for VF's in the given range.
|
|
||||||
auto isIGMember = [&](Instruction *I) -> std::function<bool(unsigned)> {
|
|
||||||
return [=](unsigned VF) -> bool {
|
|
||||||
return (VF >= 2 && // Query is illegal for VF == 1
|
|
||||||
CM.getWideningDecision(I, VF) ==
|
|
||||||
LoopVectorizationCostModel::CM_Interleave);
|
|
||||||
};
|
|
||||||
};
|
|
||||||
if (!LoopVectorizationPlanner::getDecisionAndClampRange(isIGMember(I), Range))
|
|
||||||
return nullptr;
|
|
||||||
|
|
||||||
// I is a member of an InterleaveGroup for VF's in the (possibly trimmed)
|
|
||||||
// range. If it's the primary member of the IG construct a VPInterleaveRecipe.
|
|
||||||
// Otherwise, it's an adjunct member of the IG, do not construct any Recipe.
|
|
||||||
assert(I == IG->getInsertPos() &&
|
|
||||||
"Generating a recipe for an adjunct member of an interleave group");
|
|
||||||
|
|
||||||
VPValue *Mask = nullptr;
|
|
||||||
if (Legal->isMaskRequired(I))
|
|
||||||
Mask = createBlockInMask(I->getParent(), Plan);
|
|
||||||
|
|
||||||
return new VPInterleaveRecipe(IG, Mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
VPWidenMemoryInstructionRecipe *
|
VPWidenMemoryInstructionRecipe *
|
||||||
VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
|
VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
|
||||||
VPlanPtr &Plan) {
|
VPlanPtr &Plan) {
|
||||||
|
@ -6757,8 +6726,6 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
|
||||||
CM.getWideningDecision(I, VF);
|
CM.getWideningDecision(I, VF);
|
||||||
assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
|
assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
|
||||||
"CM decision should be taken at this point.");
|
"CM decision should be taken at this point.");
|
||||||
assert(Decision != LoopVectorizationCostModel::CM_Interleave &&
|
|
||||||
"Interleave memory opportunity should be caught earlier.");
|
|
||||||
return Decision != LoopVectorizationCostModel::CM_Scalarize;
|
return Decision != LoopVectorizationCostModel::CM_Scalarize;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -6923,15 +6890,21 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
|
||||||
if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range))
|
if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
// If this ingredient's recipe is to be recorded, keep its recipe a singleton
|
||||||
|
// to avoid having to split recipes later.
|
||||||
|
bool IsSingleton = Ingredient2Recipe.count(I);
|
||||||
|
|
||||||
// Success: widen this instruction. We optimize the common case where
|
// Success: widen this instruction. We optimize the common case where
|
||||||
// consecutive instructions can be represented by a single recipe.
|
// consecutive instructions can be represented by a single recipe.
|
||||||
if (!VPBB->empty()) {
|
if (!IsSingleton && !VPBB->empty() && LastExtensibleRecipe == &VPBB->back() &&
|
||||||
VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back());
|
LastExtensibleRecipe->appendInstruction(I))
|
||||||
if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I))
|
return true;
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
VPBB->appendRecipe(new VPWidenRecipe(I));
|
VPWidenRecipe *WidenRecipe = new VPWidenRecipe(I);
|
||||||
|
if (!IsSingleton)
|
||||||
|
LastExtensibleRecipe = WidenRecipe;
|
||||||
|
setRecipe(I, WidenRecipe);
|
||||||
|
VPBB->appendRecipe(WidenRecipe);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6947,6 +6920,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
|
||||||
[&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range);
|
[&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range);
|
||||||
|
|
||||||
auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated);
|
auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated);
|
||||||
|
setRecipe(I, Recipe);
|
||||||
|
|
||||||
// Find if I uses a predicated instruction. If so, it will use its scalar
|
// Find if I uses a predicated instruction. If so, it will use its scalar
|
||||||
// value. Avoid hoisting the insert-element which packs the scalar value into
|
// value. Avoid hoisting the insert-element which packs the scalar value into
|
||||||
|
@ -7005,36 +6979,20 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr,
|
||||||
bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range,
|
bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range,
|
||||||
VPlanPtr &Plan, VPBasicBlock *VPBB) {
|
VPlanPtr &Plan, VPBasicBlock *VPBB) {
|
||||||
VPRecipeBase *Recipe = nullptr;
|
VPRecipeBase *Recipe = nullptr;
|
||||||
// Check if Instr should belong to an interleave memory recipe, or already
|
|
||||||
// does. In the latter case Instr is irrelevant.
|
// First, check for specific widening recipes that deal with memory
|
||||||
if ((Recipe = tryToInterleaveMemory(Instr, Range, Plan))) {
|
// operations, inductions and Phi nodes.
|
||||||
|
if ((Recipe = tryToWidenMemory(Instr, Range, Plan)) ||
|
||||||
|
(Recipe = tryToOptimizeInduction(Instr, Range)) ||
|
||||||
|
(Recipe = tryToBlend(Instr, Plan)) ||
|
||||||
|
(isa<PHINode>(Instr) &&
|
||||||
|
(Recipe = new VPWidenPHIRecipe(cast<PHINode>(Instr))))) {
|
||||||
|
setRecipe(Instr, Recipe);
|
||||||
VPBB->appendRecipe(Recipe);
|
VPBB->appendRecipe(Recipe);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if Instr is a memory operation that should be widened.
|
// Check if Instr is to be widened by a general VPWidenRecipe.
|
||||||
if ((Recipe = tryToWidenMemory(Instr, Range, Plan))) {
|
|
||||||
VPBB->appendRecipe(Recipe);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if Instr should form some PHI recipe.
|
|
||||||
if ((Recipe = tryToOptimizeInduction(Instr, Range))) {
|
|
||||||
VPBB->appendRecipe(Recipe);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if ((Recipe = tryToBlend(Instr, Plan))) {
|
|
||||||
VPBB->appendRecipe(Recipe);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (PHINode *Phi = dyn_cast<PHINode>(Instr)) {
|
|
||||||
VPBB->appendRecipe(new VPWidenPHIRecipe(Phi));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if Instr is to be widened by a general VPWidenRecipe, after
|
|
||||||
// having first checked for specific widening recipes that deal with
|
|
||||||
// Interleave Groups, Inductions and Phi nodes.
|
|
||||||
if (tryToWiden(Instr, VPBB, Range))
|
if (tryToWiden(Instr, VPBB, Range))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
@ -7090,19 +7048,57 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
|
||||||
VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
|
VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
|
||||||
VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
|
VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
|
||||||
SmallPtrSetImpl<Instruction *> &DeadInstructions) {
|
SmallPtrSetImpl<Instruction *> &DeadInstructions) {
|
||||||
|
|
||||||
// Hold a mapping from predicated instructions to their recipes, in order to
|
// Hold a mapping from predicated instructions to their recipes, in order to
|
||||||
// fix their AlsoPack behavior if a user is determined to replicate and use a
|
// fix their AlsoPack behavior if a user is determined to replicate and use a
|
||||||
// scalar instead of vector value.
|
// scalar instead of vector value.
|
||||||
DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe;
|
DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe;
|
||||||
|
|
||||||
DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter();
|
DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter();
|
||||||
DenseMap<Instruction *, Instruction *> SinkAfterInverse;
|
|
||||||
|
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
|
||||||
|
|
||||||
|
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Pre-construction: record ingredients whose recipes we'll need to further
|
||||||
|
// process after constructing the initial VPlan.
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// Mark instructions we'll need to sink later and their targets as
|
||||||
|
// ingredients whose recipe we'll need to record.
|
||||||
|
for (auto &Entry : SinkAfter) {
|
||||||
|
RecipeBuilder.recordRecipeOf(Entry.first);
|
||||||
|
RecipeBuilder.recordRecipeOf(Entry.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
// For each interleave group which is relevant for this (possibly trimmed)
|
||||||
|
// Range, add it to the set of groups to be later applied to the VPlan and add
|
||||||
|
// placeholders for its members' Recipes which we'll be replacing with a
|
||||||
|
// single VPInterleaveRecipe.
|
||||||
|
for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) {
|
||||||
|
auto applyIG = [IG, this](unsigned VF) -> bool {
|
||||||
|
return (VF >= 2 && // Query is illegal for VF == 1
|
||||||
|
CM.getWideningDecision(IG->getInsertPos(), VF) ==
|
||||||
|
LoopVectorizationCostModel::CM_Interleave);
|
||||||
|
};
|
||||||
|
if (!getDecisionAndClampRange(applyIG, Range))
|
||||||
|
continue;
|
||||||
|
InterleaveGroups.insert(IG);
|
||||||
|
for (unsigned i = 0; i < IG->getFactor(); i++)
|
||||||
|
if (Instruction *Member = IG->getMember(i))
|
||||||
|
RecipeBuilder.recordRecipeOf(Member);
|
||||||
|
};
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Build initial VPlan: Scan the body of the loop in a topological order to
|
||||||
|
// visit each basic block after having visited its predecessor basic blocks.
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
// Create a dummy pre-entry VPBasicBlock to start building the VPlan.
|
// Create a dummy pre-entry VPBasicBlock to start building the VPlan.
|
||||||
VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
|
VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
|
||||||
auto Plan = std::make_unique<VPlan>(VPBB);
|
auto Plan = std::make_unique<VPlan>(VPBB);
|
||||||
|
|
||||||
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
|
|
||||||
// Represent values that will have defs inside VPlan.
|
// Represent values that will have defs inside VPlan.
|
||||||
for (Value *V : NeedDef)
|
for (Value *V : NeedDef)
|
||||||
Plan->addVPValue(V);
|
Plan->addVPValue(V);
|
||||||
|
@ -7123,8 +7119,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
|
||||||
|
|
||||||
std::vector<Instruction *> Ingredients;
|
std::vector<Instruction *> Ingredients;
|
||||||
|
|
||||||
// Organize the ingredients to vectorize from current basic block in the
|
// Introduce each ingredient into VPlan.
|
||||||
// right order.
|
|
||||||
for (Instruction &I : BB->instructionsWithoutDebug()) {
|
for (Instruction &I : BB->instructionsWithoutDebug()) {
|
||||||
Instruction *Instr = &I;
|
Instruction *Instr = &I;
|
||||||
|
|
||||||
|
@ -7134,43 +7129,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
|
||||||
DeadInstructions.find(Instr) != DeadInstructions.end())
|
DeadInstructions.find(Instr) != DeadInstructions.end())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// I is a member of an InterleaveGroup for Range.Start. If it's an adjunct
|
|
||||||
// member of the IG, do not construct any Recipe for it.
|
|
||||||
const InterleaveGroup<Instruction> *IG =
|
|
||||||
CM.getInterleavedAccessGroup(Instr);
|
|
||||||
if (IG && Instr != IG->getInsertPos() &&
|
|
||||||
Range.Start >= 2 && // Query is illegal for VF == 1
|
|
||||||
CM.getWideningDecision(Instr, Range.Start) ==
|
|
||||||
LoopVectorizationCostModel::CM_Interleave) {
|
|
||||||
auto SinkCandidate = SinkAfterInverse.find(Instr);
|
|
||||||
if (SinkCandidate != SinkAfterInverse.end())
|
|
||||||
Ingredients.push_back(SinkCandidate->second);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Move instructions to handle first-order recurrences, step 1: avoid
|
|
||||||
// handling this instruction until after we've handled the instruction it
|
|
||||||
// should follow.
|
|
||||||
auto SAIt = SinkAfter.find(Instr);
|
|
||||||
if (SAIt != SinkAfter.end()) {
|
|
||||||
LLVM_DEBUG(dbgs() << "Sinking" << *SAIt->first << " after"
|
|
||||||
<< *SAIt->second
|
|
||||||
<< " to vectorize a 1st order recurrence.\n");
|
|
||||||
SinkAfterInverse[SAIt->second] = Instr;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ingredients.push_back(Instr);
|
|
||||||
|
|
||||||
// Move instructions to handle first-order recurrences, step 2: push the
|
|
||||||
// instruction to be sunk at its insertion point.
|
|
||||||
auto SAInvIt = SinkAfterInverse.find(Instr);
|
|
||||||
if (SAInvIt != SinkAfterInverse.end())
|
|
||||||
Ingredients.push_back(SAInvIt->second);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Introduce each ingredient into VPlan.
|
|
||||||
for (Instruction *Instr : Ingredients) {
|
|
||||||
if (RecipeBuilder.tryToCreateRecipe(Instr, Range, Plan, VPBB))
|
if (RecipeBuilder.tryToCreateRecipe(Instr, Range, Plan, VPBB))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -7195,6 +7153,32 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
|
||||||
VPBlockUtils::disconnectBlocks(PreEntry, Entry);
|
VPBlockUtils::disconnectBlocks(PreEntry, Entry);
|
||||||
delete PreEntry;
|
delete PreEntry;
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Transform initial VPlan: Apply previously taken decisions, in order, to
|
||||||
|
// bring the VPlan to its final state.
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// Apply Sink-After legal constraints.
|
||||||
|
for (auto &Entry : SinkAfter) {
|
||||||
|
VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first);
|
||||||
|
VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second);
|
||||||
|
Sink->moveAfter(Target);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Interleave memory: for each Interleave Group we marked earlier as relevant
|
||||||
|
// for this VPlan, replace the Recipes widening its memory instructions with a
|
||||||
|
// single VPInterleaveRecipe at its insertion point.
|
||||||
|
for (auto IG : InterleaveGroups) {
|
||||||
|
auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
|
||||||
|
RecipeBuilder.getRecipe(IG->getInsertPos()));
|
||||||
|
(new VPInterleaveRecipe(IG, Recipe->getMask()))->insertBefore(Recipe);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < IG->getFactor(); ++i)
|
||||||
|
if (Instruction *Member = IG->getMember(i)) {
|
||||||
|
RecipeBuilder.getRecipe(Member)->eraseFromParent();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Finally, if tail is folded by masking, introduce selects between the phi
|
// Finally, if tail is folded by masking, introduce selects between the phi
|
||||||
// and the live-out instruction of each reduction, at the end of the latch.
|
// and the live-out instruction of each reduction, at the end of the latch.
|
||||||
if (CM.foldTailByMasking()) {
|
if (CM.foldTailByMasking()) {
|
||||||
|
@ -7427,12 +7411,11 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
|
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
|
||||||
if (!User)
|
VPValue *Mask = getMask();
|
||||||
|
if (!Mask)
|
||||||
return State.ILV->vectorizeMemoryInstruction(&Instr);
|
return State.ILV->vectorizeMemoryInstruction(&Instr);
|
||||||
|
|
||||||
// Last (and currently only) operand is a mask.
|
|
||||||
InnerLoopVectorizer::VectorParts MaskValues(State.UF);
|
InnerLoopVectorizer::VectorParts MaskValues(State.UF);
|
||||||
VPValue *Mask = User->getOperand(User->getNumOperands() - 1);
|
|
||||||
for (unsigned Part = 0; Part < State.UF; ++Part)
|
for (unsigned Part = 0; Part < State.UF; ++Part)
|
||||||
MaskValues[Part] = State.get(Mask, Part);
|
MaskValues[Part] = State.get(Mask, Part);
|
||||||
State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues);
|
State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues);
|
||||||
|
@ -7481,7 +7464,7 @@ static bool processLoopInVPlanNativePath(
|
||||||
// Use the planner for outer loop vectorization.
|
// Use the planner for outer loop vectorization.
|
||||||
// TODO: CM is not used at this point inside the planner. Turn CM into an
|
// TODO: CM is not used at this point inside the planner. Turn CM into an
|
||||||
// optional argument if we don't need it in the future.
|
// optional argument if we don't need it in the future.
|
||||||
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM);
|
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI);
|
||||||
|
|
||||||
// Get user vectorization factor.
|
// Get user vectorization factor.
|
||||||
const unsigned UserVF = Hints.getWidth();
|
const unsigned UserVF = Hints.getWidth();
|
||||||
|
@ -7641,7 +7624,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
|
||||||
CM.collectValuesToIgnore();
|
CM.collectValuesToIgnore();
|
||||||
|
|
||||||
// Use the planner for vectorization.
|
// Use the planner for vectorization.
|
||||||
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM);
|
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI);
|
||||||
|
|
||||||
// Get user vectorization factor.
|
// Get user vectorization factor.
|
||||||
unsigned UserVF = Hints.getWidth();
|
unsigned UserVF = Hints.getWidth();
|
||||||
|
|
|
@ -47,6 +47,24 @@ class VPRecipeBuilder {
|
||||||
EdgeMaskCacheTy EdgeMaskCache;
|
EdgeMaskCacheTy EdgeMaskCache;
|
||||||
BlockMaskCacheTy BlockMaskCache;
|
BlockMaskCacheTy BlockMaskCache;
|
||||||
|
|
||||||
|
// VPlan-VPlan transformations support: Hold a mapping from ingredients to
|
||||||
|
// their recipe. To save on memory, only do so for selected ingredients,
|
||||||
|
// marked by having a nullptr entry in this map. If those ingredients get a
|
||||||
|
// VPWidenRecipe, also avoid compressing other ingredients into it to avoid
|
||||||
|
// having to split such recipes later.
|
||||||
|
DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe;
|
||||||
|
VPWidenRecipe *LastExtensibleRecipe = nullptr;
|
||||||
|
|
||||||
|
/// Set the recipe created for given ingredient. This operation is a no-op for
|
||||||
|
/// ingredients that were not marked using a nullptr entry in the map.
|
||||||
|
void setRecipe(Instruction *I, VPRecipeBase *R) {
|
||||||
|
if (!Ingredient2Recipe.count(I))
|
||||||
|
return;
|
||||||
|
assert(Ingredient2Recipe[I] == nullptr &&
|
||||||
|
"Recipe already set for ingredient");
|
||||||
|
Ingredient2Recipe[I] = R;
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// A helper function that computes the predicate of the block BB, assuming
|
/// A helper function that computes the predicate of the block BB, assuming
|
||||||
/// that the header block of the loop is set to True. It returns the *entry*
|
/// that the header block of the loop is set to True. It returns the *entry*
|
||||||
|
@ -57,16 +75,22 @@ public:
|
||||||
/// and DST.
|
/// and DST.
|
||||||
VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan);
|
VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan);
|
||||||
|
|
||||||
/// Check if \I belongs to an Interleave Group within the given VF \p Range,
|
/// Mark given ingredient for recording its recipe once one is created for
|
||||||
/// \return true in the first returned value if so and false otherwise.
|
/// it.
|
||||||
/// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG
|
void recordRecipeOf(Instruction *I) {
|
||||||
/// for \p Range.Start, and provide it as the second returned value.
|
assert((!Ingredient2Recipe.count(I) || Ingredient2Recipe[I] == nullptr) &&
|
||||||
/// Note that if \I is an adjunct member of an IG for \p Range.Start, the
|
"Recipe already set for ingredient");
|
||||||
/// \return value is <true, nullptr>, as it is handled by another recipe.
|
Ingredient2Recipe[I] = nullptr;
|
||||||
/// \p Range.End may be decreased to ensure same decision from \p Range.Start
|
}
|
||||||
/// to \p Range.End.
|
|
||||||
VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range,
|
/// Return the recipe created for given ingredient.
|
||||||
VPlanPtr &Plan);
|
VPRecipeBase *getRecipe(Instruction *I) {
|
||||||
|
assert(Ingredient2Recipe.count(I) &&
|
||||||
|
"Recording this ingredients recipe was not requested");
|
||||||
|
assert(Ingredient2Recipe[I] != nullptr &&
|
||||||
|
"Ingredient doesn't have a recipe");
|
||||||
|
return Ingredient2Recipe[I];
|
||||||
|
}
|
||||||
|
|
||||||
/// Check if \I is a memory instruction to be widened for \p Range.Start and
|
/// Check if \I is a memory instruction to be widened for \p Range.Start and
|
||||||
/// potentially masked. Such instructions are handled by a recipe that takes
|
/// potentially masked. Such instructions are handled by a recipe that takes
|
||||||
|
|
|
@ -275,18 +275,35 @@ void VPRegionBlock::execute(VPTransformState *State) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
|
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
|
||||||
|
assert(!Parent && "Recipe already in some VPBasicBlock");
|
||||||
|
assert(InsertPos->getParent() &&
|
||||||
|
"Insertion position not in any VPBasicBlock");
|
||||||
Parent = InsertPos->getParent();
|
Parent = InsertPos->getParent();
|
||||||
Parent->getRecipeList().insert(InsertPos->getIterator(), this);
|
Parent->getRecipeList().insert(InsertPos->getIterator(), this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
|
||||||
|
assert(!Parent && "Recipe already in some VPBasicBlock");
|
||||||
|
assert(InsertPos->getParent() &&
|
||||||
|
"Insertion position not in any VPBasicBlock");
|
||||||
|
Parent = InsertPos->getParent();
|
||||||
|
Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VPRecipeBase::removeFromParent() {
|
||||||
|
assert(getParent() && "Recipe not in any VPBasicBlock");
|
||||||
|
getParent()->getRecipeList().remove(getIterator());
|
||||||
|
Parent = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
|
iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
|
||||||
|
assert(getParent() && "Recipe not in any VPBasicBlock");
|
||||||
return getParent()->getRecipeList().erase(getIterator());
|
return getParent()->getRecipeList().erase(getIterator());
|
||||||
}
|
}
|
||||||
|
|
||||||
void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
|
void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
|
||||||
InsertPos->getParent()->getRecipeList().splice(
|
removeFromParent();
|
||||||
std::next(InsertPos->getIterator()), getParent()->getRecipeList(),
|
insertAfter(InsertPos);
|
||||||
getIterator());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void VPInstruction::generateInstruction(VPTransformState &State,
|
void VPInstruction::generateInstruction(VPTransformState &State,
|
||||||
|
|
|
@ -567,6 +567,7 @@ public:
|
||||||
/// instructions.
|
/// instructions.
|
||||||
class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
|
class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
|
||||||
friend VPBasicBlock;
|
friend VPBasicBlock;
|
||||||
|
friend class VPBlockUtils;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
|
const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
|
||||||
|
@ -615,10 +616,18 @@ public:
|
||||||
/// the specified recipe.
|
/// the specified recipe.
|
||||||
void insertBefore(VPRecipeBase *InsertPos);
|
void insertBefore(VPRecipeBase *InsertPos);
|
||||||
|
|
||||||
|
/// Insert an unlinked Recipe into a basic block immediately after
|
||||||
|
/// the specified Recipe.
|
||||||
|
void insertAfter(VPRecipeBase *InsertPos);
|
||||||
|
|
||||||
/// Unlink this recipe from its current VPBasicBlock and insert it into
|
/// Unlink this recipe from its current VPBasicBlock and insert it into
|
||||||
/// the VPBasicBlock that MovePos lives in, right after MovePos.
|
/// the VPBasicBlock that MovePos lives in, right after MovePos.
|
||||||
void moveAfter(VPRecipeBase *MovePos);
|
void moveAfter(VPRecipeBase *MovePos);
|
||||||
|
|
||||||
|
/// This method unlinks 'this' from the containing basic block, but does not
|
||||||
|
/// delete it.
|
||||||
|
void removeFromParent();
|
||||||
|
|
||||||
/// This method unlinks 'this' from the containing basic block and deletes it.
|
/// This method unlinks 'this' from the containing basic block and deletes it.
|
||||||
///
|
///
|
||||||
/// \returns an iterator pointing to the element after the erased one
|
/// \returns an iterator pointing to the element after the erased one
|
||||||
|
@ -973,6 +982,13 @@ public:
|
||||||
return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC;
|
return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the mask used by this recipe. Note that a full mask is represented
|
||||||
|
/// by a nullptr.
|
||||||
|
VPValue *getMask() {
|
||||||
|
// Mask is the last operand.
|
||||||
|
return User ? User->getOperand(User->getNumOperands() - 1) : nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
/// Generate the wide load/store.
|
/// Generate the wide load/store.
|
||||||
void execute(VPTransformState &State) override;
|
void execute(VPTransformState &State) override;
|
||||||
|
|
||||||
|
|
|
@ -572,3 +572,38 @@ for.body:
|
||||||
for.end:
|
for.end:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Do not sink branches: While branches are if-converted and do not require
|
||||||
|
; sinking, instructions with side effects (e.g. loads) conditioned by those
|
||||||
|
; branches will become users of the condition bit after vectorization and would
|
||||||
|
; need to be sunk if the loop is vectorized.
|
||||||
|
define void @do_not_sink_branch(i32 %x, i32* %in, i32* %out, i32 %tc) local_unnamed_addr #0 {
|
||||||
|
; NO-SINK-AFTER-LABEL: do_not_sink_branch
|
||||||
|
; NO-SINK-AFTER-NOT: vector.ph:
|
||||||
|
; NO-SINK-AFTER: }
|
||||||
|
entry:
|
||||||
|
%cmp530 = icmp slt i32 0, %tc
|
||||||
|
br label %for.body4
|
||||||
|
|
||||||
|
for.body4: ; preds = %cond.end, %entry
|
||||||
|
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %cond.end ]
|
||||||
|
%cmp534 = phi i1 [ %cmp530, %entry ], [ %cmp5, %cond.end ]
|
||||||
|
br i1 %cmp534, label %cond.true, label %cond.end
|
||||||
|
|
||||||
|
cond.true: ; preds = %for.body4
|
||||||
|
%arrayidx7 = getelementptr inbounds i32, i32* %in, i32 %indvars.iv
|
||||||
|
%in.val = load i32, i32* %arrayidx7, align 4
|
||||||
|
br label %cond.end
|
||||||
|
|
||||||
|
cond.end: ; preds = %for.body4, %cond.true
|
||||||
|
%cond = phi i32 [ %in.val, %cond.true ], [ 0, %for.body4 ]
|
||||||
|
%arrayidx8 = getelementptr inbounds i32, i32* %out, i32 %indvars.iv
|
||||||
|
store i32 %cond, i32* %arrayidx8, align 4
|
||||||
|
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
|
||||||
|
%cmp5 = icmp slt i32 %indvars.iv.next, %tc
|
||||||
|
%exitcond = icmp eq i32 %indvars.iv.next, %x
|
||||||
|
br i1 %exitcond, label %for.end12.loopexit, label %for.body4
|
||||||
|
|
||||||
|
for.end12.loopexit: ; preds = %cond.end
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
|
@ -83,6 +83,7 @@ TEST(VPInstructionTest, moveAfter) {
|
||||||
|
|
||||||
CHECK_ITERATOR(VPBB1, I2, I1);
|
CHECK_ITERATOR(VPBB1, I2, I1);
|
||||||
CHECK_ITERATOR(VPBB2, I4, I3, I5);
|
CHECK_ITERATOR(VPBB2, I4, I3, I5);
|
||||||
|
EXPECT_EQ(I3->getParent(), I4->getParent());
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
Loading…
Reference in New Issue