[LV] Apply sink-after & interleave-groups as VPlan transformations (NFCI)

This recommits 100e797adb (reverted in
009e032634 for failing an assert). While the
root cause was independently reverted in eaff300401,
this commit includes a LIT to make sure IVDescriptor's SinkAfter logic does not
try to sink branch instructions.
This commit is contained in:
Gil Rapaport 2019-10-07 17:24:33 +03:00
parent ef459dedca
commit 11ed1c0239
8 changed files with 210 additions and 132 deletions

View File

@ -542,13 +542,10 @@ public:
/// formation for predicated accesses, we may be able to relax this limitation
/// in the future once we handle more complicated blocks.
void reset() {
SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet;
// Avoid releasing a pointer twice.
for (auto &I : InterleaveGroupMap)
DelSet.insert(I.second);
for (auto *Ptr : DelSet)
delete Ptr;
InterleaveGroupMap.clear();
for (auto *Ptr : InterleaveGroups)
delete Ptr;
InterleaveGroups.clear();
RequiresScalarEpilogue = false;
}

View File

@ -201,6 +201,9 @@ class LoopVectorizationPlanner {
/// The profitability analysis.
LoopVectorizationCostModel &CM;
/// The interleaved access analysis.
InterleavedAccessInfo &IAI;
SmallVector<VPlanPtr, 4> VPlans;
/// This class is used to enable the VPlan to invoke a method of ILV. This is
@ -223,8 +226,10 @@ public:
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
LoopVectorizationLegality *Legal,
LoopVectorizationCostModel &CM)
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {}
LoopVectorizationCostModel &CM,
InterleavedAccessInfo &IAI)
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
IAI(IAI) {}
/// Plan how to best vectorize, return the best VF and its cost, or None if
/// vectorization and interleaving should be avoided up front.

View File

@ -6710,37 +6710,6 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
return BlockMaskCache[BB] = BlockMask;
}
VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I,
VFRange &Range,
VPlanPtr &Plan) {
const InterleaveGroup<Instruction> *IG = CM.getInterleavedAccessGroup(I);
if (!IG)
return nullptr;
// Now check if IG is relevant for VF's in the given range.
auto isIGMember = [&](Instruction *I) -> std::function<bool(unsigned)> {
return [=](unsigned VF) -> bool {
return (VF >= 2 && // Query is illegal for VF == 1
CM.getWideningDecision(I, VF) ==
LoopVectorizationCostModel::CM_Interleave);
};
};
if (!LoopVectorizationPlanner::getDecisionAndClampRange(isIGMember(I), Range))
return nullptr;
// I is a member of an InterleaveGroup for VF's in the (possibly trimmed)
// range. If it's the primary member of the IG construct a VPInterleaveRecipe.
// Otherwise, it's an adjunct member of the IG, do not construct any Recipe.
assert(I == IG->getInsertPos() &&
"Generating a recipe for an adjunct member of an interleave group");
VPValue *Mask = nullptr;
if (Legal->isMaskRequired(I))
Mask = createBlockInMask(I->getParent(), Plan);
return new VPInterleaveRecipe(IG, Mask);
}
VPWidenMemoryInstructionRecipe *
VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
VPlanPtr &Plan) {
@ -6757,8 +6726,6 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
CM.getWideningDecision(I, VF);
assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
"CM decision should be taken at this point.");
assert(Decision != LoopVectorizationCostModel::CM_Interleave &&
"Interleave memory opportunity should be caught earlier.");
return Decision != LoopVectorizationCostModel::CM_Scalarize;
};
@ -6923,15 +6890,21 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range))
return false;
// If this ingredient's recipe is to be recorded, keep its recipe a singleton
// to avoid having to split recipes later.
bool IsSingleton = Ingredient2Recipe.count(I);
// Success: widen this instruction. We optimize the common case where
// consecutive instructions can be represented by a single recipe.
if (!VPBB->empty()) {
VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back());
if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I))
return true;
}
if (!IsSingleton && !VPBB->empty() && LastExtensibleRecipe == &VPBB->back() &&
LastExtensibleRecipe->appendInstruction(I))
return true;
VPBB->appendRecipe(new VPWidenRecipe(I));
VPWidenRecipe *WidenRecipe = new VPWidenRecipe(I);
if (!IsSingleton)
LastExtensibleRecipe = WidenRecipe;
setRecipe(I, WidenRecipe);
VPBB->appendRecipe(WidenRecipe);
return true;
}
@ -6947,6 +6920,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
[&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range);
auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated);
setRecipe(I, Recipe);
// Find if I uses a predicated instruction. If so, it will use its scalar
// value. Avoid hoisting the insert-element which packs the scalar value into
@ -7005,36 +6979,20 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr,
bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range,
VPlanPtr &Plan, VPBasicBlock *VPBB) {
VPRecipeBase *Recipe = nullptr;
// Check if Instr should belong to an interleave memory recipe, or already
// does. In the latter case Instr is irrelevant.
if ((Recipe = tryToInterleaveMemory(Instr, Range, Plan))) {
// First, check for specific widening recipes that deal with memory
// operations, inductions and Phi nodes.
if ((Recipe = tryToWidenMemory(Instr, Range, Plan)) ||
(Recipe = tryToOptimizeInduction(Instr, Range)) ||
(Recipe = tryToBlend(Instr, Plan)) ||
(isa<PHINode>(Instr) &&
(Recipe = new VPWidenPHIRecipe(cast<PHINode>(Instr))))) {
setRecipe(Instr, Recipe);
VPBB->appendRecipe(Recipe);
return true;
}
// Check if Instr is a memory operation that should be widened.
if ((Recipe = tryToWidenMemory(Instr, Range, Plan))) {
VPBB->appendRecipe(Recipe);
return true;
}
// Check if Instr should form some PHI recipe.
if ((Recipe = tryToOptimizeInduction(Instr, Range))) {
VPBB->appendRecipe(Recipe);
return true;
}
if ((Recipe = tryToBlend(Instr, Plan))) {
VPBB->appendRecipe(Recipe);
return true;
}
if (PHINode *Phi = dyn_cast<PHINode>(Instr)) {
VPBB->appendRecipe(new VPWidenPHIRecipe(Phi));
return true;
}
// Check if Instr is to be widened by a general VPWidenRecipe, after
// having first checked for specific widening recipes that deal with
// Interleave Groups, Inductions and Phi nodes.
// Check if Instr is to be widened by a general VPWidenRecipe.
if (tryToWiden(Instr, VPBB, Range))
return true;
@ -7090,19 +7048,57 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
SmallPtrSetImpl<Instruction *> &DeadInstructions) {
// Hold a mapping from predicated instructions to their recipes, in order to
// fix their AlsoPack behavior if a user is determined to replicate and use a
// scalar instead of vector value.
DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe;
DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter();
DenseMap<Instruction *, Instruction *> SinkAfterInverse;
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
// ---------------------------------------------------------------------------
// Pre-construction: record ingredients whose recipes we'll need to further
// process after constructing the initial VPlan.
// ---------------------------------------------------------------------------
// Mark instructions we'll need to sink later and their targets as
// ingredients whose recipe we'll need to record.
for (auto &Entry : SinkAfter) {
RecipeBuilder.recordRecipeOf(Entry.first);
RecipeBuilder.recordRecipeOf(Entry.second);
}
// For each interleave group which is relevant for this (possibly trimmed)
// Range, add it to the set of groups to be later applied to the VPlan and add
// placeholders for its members' Recipes which we'll be replacing with a
// single VPInterleaveRecipe.
for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) {
auto applyIG = [IG, this](unsigned VF) -> bool {
return (VF >= 2 && // Query is illegal for VF == 1
CM.getWideningDecision(IG->getInsertPos(), VF) ==
LoopVectorizationCostModel::CM_Interleave);
};
if (!getDecisionAndClampRange(applyIG, Range))
continue;
InterleaveGroups.insert(IG);
for (unsigned i = 0; i < IG->getFactor(); i++)
if (Instruction *Member = IG->getMember(i))
RecipeBuilder.recordRecipeOf(Member);
};
// ---------------------------------------------------------------------------
// Build initial VPlan: Scan the body of the loop in a topological order to
// visit each basic block after having visited its predecessor basic blocks.
// ---------------------------------------------------------------------------
// Create a dummy pre-entry VPBasicBlock to start building the VPlan.
VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
auto Plan = std::make_unique<VPlan>(VPBB);
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
// Represent values that will have defs inside VPlan.
for (Value *V : NeedDef)
Plan->addVPValue(V);
@ -7123,8 +7119,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
std::vector<Instruction *> Ingredients;
// Organize the ingredients to vectorize from current basic block in the
// right order.
// Introduce each ingredient into VPlan.
for (Instruction &I : BB->instructionsWithoutDebug()) {
Instruction *Instr = &I;
@ -7134,43 +7129,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
DeadInstructions.find(Instr) != DeadInstructions.end())
continue;
// I is a member of an InterleaveGroup for Range.Start. If it's an adjunct
// member of the IG, do not construct any Recipe for it.
const InterleaveGroup<Instruction> *IG =
CM.getInterleavedAccessGroup(Instr);
if (IG && Instr != IG->getInsertPos() &&
Range.Start >= 2 && // Query is illegal for VF == 1
CM.getWideningDecision(Instr, Range.Start) ==
LoopVectorizationCostModel::CM_Interleave) {
auto SinkCandidate = SinkAfterInverse.find(Instr);
if (SinkCandidate != SinkAfterInverse.end())
Ingredients.push_back(SinkCandidate->second);
continue;
}
// Move instructions to handle first-order recurrences, step 1: avoid
// handling this instruction until after we've handled the instruction it
// should follow.
auto SAIt = SinkAfter.find(Instr);
if (SAIt != SinkAfter.end()) {
LLVM_DEBUG(dbgs() << "Sinking" << *SAIt->first << " after"
<< *SAIt->second
<< " to vectorize a 1st order recurrence.\n");
SinkAfterInverse[SAIt->second] = Instr;
continue;
}
Ingredients.push_back(Instr);
// Move instructions to handle first-order recurrences, step 2: push the
// instruction to be sunk at its insertion point.
auto SAInvIt = SinkAfterInverse.find(Instr);
if (SAInvIt != SinkAfterInverse.end())
Ingredients.push_back(SAInvIt->second);
}
// Introduce each ingredient into VPlan.
for (Instruction *Instr : Ingredients) {
if (RecipeBuilder.tryToCreateRecipe(Instr, Range, Plan, VPBB))
continue;
@ -7195,6 +7153,32 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VPBlockUtils::disconnectBlocks(PreEntry, Entry);
delete PreEntry;
// ---------------------------------------------------------------------------
// Transform initial VPlan: Apply previously taken decisions, in order, to
// bring the VPlan to its final state.
// ---------------------------------------------------------------------------
// Apply Sink-After legal constraints.
for (auto &Entry : SinkAfter) {
VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first);
VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second);
Sink->moveAfter(Target);
}
// Interleave memory: for each Interleave Group we marked earlier as relevant
// for this VPlan, replace the Recipes widening its memory instructions with a
// single VPInterleaveRecipe at its insertion point.
for (auto IG : InterleaveGroups) {
auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
RecipeBuilder.getRecipe(IG->getInsertPos()));
(new VPInterleaveRecipe(IG, Recipe->getMask()))->insertBefore(Recipe);
for (unsigned i = 0; i < IG->getFactor(); ++i)
if (Instruction *Member = IG->getMember(i)) {
RecipeBuilder.getRecipe(Member)->eraseFromParent();
}
}
// Finally, if tail is folded by masking, introduce selects between the phi
// and the live-out instruction of each reduction, at the end of the latch.
if (CM.foldTailByMasking()) {
@ -7427,12 +7411,11 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
}
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
if (!User)
VPValue *Mask = getMask();
if (!Mask)
return State.ILV->vectorizeMemoryInstruction(&Instr);
// Last (and currently only) operand is a mask.
InnerLoopVectorizer::VectorParts MaskValues(State.UF);
VPValue *Mask = User->getOperand(User->getNumOperands() - 1);
for (unsigned Part = 0; Part < State.UF; ++Part)
MaskValues[Part] = State.get(Mask, Part);
State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues);
@ -7481,7 +7464,7 @@ static bool processLoopInVPlanNativePath(
// Use the planner for outer loop vectorization.
// TODO: CM is not used at this point inside the planner. Turn CM into an
// optional argument if we don't need it in the future.
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM);
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI);
// Get user vectorization factor.
const unsigned UserVF = Hints.getWidth();
@ -7641,7 +7624,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
CM.collectValuesToIgnore();
// Use the planner for vectorization.
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM);
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI);
// Get user vectorization factor.
unsigned UserVF = Hints.getWidth();

View File

@ -47,6 +47,24 @@ class VPRecipeBuilder {
EdgeMaskCacheTy EdgeMaskCache;
BlockMaskCacheTy BlockMaskCache;
// VPlan-VPlan transformations support: Hold a mapping from ingredients to
// their recipe. To save on memory, only do so for selected ingredients,
// marked by having a nullptr entry in this map. If those ingredients get a
// VPWidenRecipe, also avoid compressing other ingredients into it to avoid
// having to split such recipes later.
DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe;
VPWidenRecipe *LastExtensibleRecipe = nullptr;
/// Set the recipe created for given ingredient. This operation is a no-op for
/// ingredients that were not marked using a nullptr entry in the map.
void setRecipe(Instruction *I, VPRecipeBase *R) {
if (!Ingredient2Recipe.count(I))
return;
assert(Ingredient2Recipe[I] == nullptr &&
"Recipe already set for ingredient");
Ingredient2Recipe[I] = R;
}
public:
/// A helper function that computes the predicate of the block BB, assuming
/// that the header block of the loop is set to True. It returns the *entry*
@ -57,16 +75,22 @@ public:
/// and DST.
VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan);
/// Check if \I belongs to an Interleave Group within the given VF \p Range,
/// \return true in the first returned value if so and false otherwise.
/// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG
/// for \p Range.Start, and provide it as the second returned value.
/// Note that if \I is an adjunct member of an IG for \p Range.Start, the
/// \return value is <true, nullptr>, as it is handled by another recipe.
/// \p Range.End may be decreased to ensure same decision from \p Range.Start
/// to \p Range.End.
VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range,
VPlanPtr &Plan);
/// Mark given ingredient for recording its recipe once one is created for
/// it.
void recordRecipeOf(Instruction *I) {
assert((!Ingredient2Recipe.count(I) || Ingredient2Recipe[I] == nullptr) &&
"Recipe already set for ingredient");
Ingredient2Recipe[I] = nullptr;
}
/// Return the recipe created for given ingredient.
VPRecipeBase *getRecipe(Instruction *I) {
assert(Ingredient2Recipe.count(I) &&
"Recording this ingredients recipe was not requested");
assert(Ingredient2Recipe[I] != nullptr &&
"Ingredient doesn't have a recipe");
return Ingredient2Recipe[I];
}
/// Check if \I is a memory instruction to be widened for \p Range.Start and
/// potentially masked. Such instructions are handled by a recipe that takes

View File

@ -275,18 +275,35 @@ void VPRegionBlock::execute(VPTransformState *State) {
}
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
assert(!Parent && "Recipe already in some VPBasicBlock");
assert(InsertPos->getParent() &&
"Insertion position not in any VPBasicBlock");
Parent = InsertPos->getParent();
Parent->getRecipeList().insert(InsertPos->getIterator(), this);
}
void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
assert(!Parent && "Recipe already in some VPBasicBlock");
assert(InsertPos->getParent() &&
"Insertion position not in any VPBasicBlock");
Parent = InsertPos->getParent();
Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
}
void VPRecipeBase::removeFromParent() {
assert(getParent() && "Recipe not in any VPBasicBlock");
getParent()->getRecipeList().remove(getIterator());
Parent = nullptr;
}
iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
assert(getParent() && "Recipe not in any VPBasicBlock");
return getParent()->getRecipeList().erase(getIterator());
}
void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
InsertPos->getParent()->getRecipeList().splice(
std::next(InsertPos->getIterator()), getParent()->getRecipeList(),
getIterator());
removeFromParent();
insertAfter(InsertPos);
}
void VPInstruction::generateInstruction(VPTransformState &State,

View File

@ -567,6 +567,7 @@ public:
/// instructions.
class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
friend VPBasicBlock;
friend class VPBlockUtils;
private:
const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
@ -615,10 +616,18 @@ public:
/// the specified recipe.
void insertBefore(VPRecipeBase *InsertPos);
/// Insert an unlinked Recipe into a basic block immediately after
/// the specified Recipe.
void insertAfter(VPRecipeBase *InsertPos);
/// Unlink this recipe from its current VPBasicBlock and insert it into
/// the VPBasicBlock that MovePos lives in, right after MovePos.
void moveAfter(VPRecipeBase *MovePos);
/// This method unlinks 'this' from the containing basic block, but does not
/// delete it.
void removeFromParent();
/// This method unlinks 'this' from the containing basic block and deletes it.
///
/// \returns an iterator pointing to the element after the erased one
@ -973,6 +982,13 @@ public:
return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC;
}
/// Return the mask used by this recipe. Note that a full mask is represented
/// by a nullptr.
VPValue *getMask() {
// Mask is the last operand.
return User ? User->getOperand(User->getNumOperands() - 1) : nullptr;
}
/// Generate the wide load/store.
void execute(VPTransformState &State) override;

View File

@ -572,3 +572,38 @@ for.body:
for.end:
ret void
}
; Do not sink branches: While branches are if-converted and do not require
; sinking, instructions with side effects (e.g. loads) conditioned by those
; branches will become users of the condition bit after vectorization and would
; need to be sunk if the loop is vectorized.
define void @do_not_sink_branch(i32 %x, i32* %in, i32* %out, i32 %tc) local_unnamed_addr #0 {
; NO-SINK-AFTER-LABEL: do_not_sink_branch
; NO-SINK-AFTER-NOT: vector.ph:
; NO-SINK-AFTER: }
entry:
%cmp530 = icmp slt i32 0, %tc
br label %for.body4
for.body4: ; preds = %cond.end, %entry
%indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %cond.end ]
%cmp534 = phi i1 [ %cmp530, %entry ], [ %cmp5, %cond.end ]
br i1 %cmp534, label %cond.true, label %cond.end
cond.true: ; preds = %for.body4
%arrayidx7 = getelementptr inbounds i32, i32* %in, i32 %indvars.iv
%in.val = load i32, i32* %arrayidx7, align 4
br label %cond.end
cond.end: ; preds = %for.body4, %cond.true
%cond = phi i32 [ %in.val, %cond.true ], [ 0, %for.body4 ]
%arrayidx8 = getelementptr inbounds i32, i32* %out, i32 %indvars.iv
store i32 %cond, i32* %arrayidx8, align 4
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
%cmp5 = icmp slt i32 %indvars.iv.next, %tc
%exitcond = icmp eq i32 %indvars.iv.next, %x
br i1 %exitcond, label %for.end12.loopexit, label %for.body4
for.end12.loopexit: ; preds = %cond.end
ret void
}

View File

@ -83,6 +83,7 @@ TEST(VPInstructionTest, moveAfter) {
CHECK_ITERATOR(VPBB1, I2, I1);
CHECK_ITERATOR(VPBB2, I4, I3, I5);
EXPECT_EQ(I3->getParent(), I4->getParent());
}
} // namespace