forked from OSchip/llvm-project
[LV] Record GEP widening decisions in recipe (NFCI)
InnerLoopVectorizer's code called during VPlan execution still relies on original IR's def-use relations to decide which vector code to generate, limiting VPlan transformations ability to modify def-use relations and still have ILV generate the vector code. This commit moves GEP operand queries controlling how GEPs are widened to a dedicated recipe and extracts GEP widening code to its own ILV method taking those recorded decisions as arguments. This reduces ingredient def-use usage by ILV as a step towards full VPlan-based def-use relations. Differential revision: https://reviews.llvm.org/D69067
This commit is contained in:
parent
b31a531f9b
commit
39ccc099c9
|
@ -428,6 +428,11 @@ public:
|
|||
/// new unrolled loop, where UF is the unroll factor.
|
||||
using VectorParts = SmallVector<Value *, 2>;
|
||||
|
||||
/// Vectorize a single GetElementPtrInst based on information gathered and
|
||||
/// decisions taken during planning.
|
||||
void widenGEP(GetElementPtrInst *GEP, unsigned UF, unsigned VF,
|
||||
bool IsPtrLoopInvariant, SmallBitVector &IsIndexLoopInvariant);
|
||||
|
||||
/// Vectorize a single PHINode in a block. This method handles the induction
|
||||
/// variable canonicalization. It supports both VF = 1 for unrolled loops and
|
||||
/// arbitrary length vectors.
|
||||
|
@ -3961,6 +3966,75 @@ void InnerLoopVectorizer::fixNonInductionPHIs() {
|
|||
}
|
||||
}
|
||||
|
||||
void InnerLoopVectorizer::widenGEP(GetElementPtrInst *GEP, unsigned UF,
|
||||
unsigned VF, bool IsPtrLoopInvariant,
|
||||
SmallBitVector &IsIndexLoopInvariant) {
|
||||
// Construct a vector GEP by widening the operands of the scalar GEP as
|
||||
// necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
|
||||
// results in a vector of pointers when at least one operand of the GEP
|
||||
// is vector-typed. Thus, to keep the representation compact, we only use
|
||||
// vector-typed operands for loop-varying values.
|
||||
|
||||
if (VF > 1 && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
|
||||
// If we are vectorizing, but the GEP has only loop-invariant operands,
|
||||
// the GEP we build (by only using vector-typed operands for
|
||||
// loop-varying values) would be a scalar pointer. Thus, to ensure we
|
||||
// produce a vector of pointers, we need to either arbitrarily pick an
|
||||
// operand to broadcast, or broadcast a clone of the original GEP.
|
||||
// Here, we broadcast a clone of the original.
|
||||
//
|
||||
// TODO: If at some point we decide to scalarize instructions having
|
||||
// loop-invariant operands, this special case will no longer be
|
||||
// required. We would add the scalarization decision to
|
||||
// collectLoopScalars() and teach getVectorValue() to broadcast
|
||||
// the lane-zero scalar value.
|
||||
auto *Clone = Builder.Insert(GEP->clone());
|
||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||
Value *EntryPart = Builder.CreateVectorSplat(VF, Clone);
|
||||
VectorLoopValueMap.setVectorValue(GEP, Part, EntryPart);
|
||||
addMetadata(EntryPart, GEP);
|
||||
}
|
||||
} else {
|
||||
// If the GEP has at least one loop-varying operand, we are sure to
|
||||
// produce a vector of pointers. But if we are only unrolling, we want
|
||||
// to produce a scalar GEP for each unroll part. Thus, the GEP we
|
||||
// produce with the code below will be scalar (if VF == 1) or vector
|
||||
// (otherwise). Note that for the unroll-only case, we still maintain
|
||||
// values in the vector mapping with initVector, as we do for other
|
||||
// instructions.
|
||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||
// The pointer operand of the new GEP. If it's loop-invariant, we
|
||||
// won't broadcast it.
|
||||
auto *Ptr = IsPtrLoopInvariant
|
||||
? GEP->getPointerOperand()
|
||||
: getOrCreateVectorValue(GEP->getPointerOperand(), Part);
|
||||
|
||||
// Collect all the indices for the new GEP. If any index is
|
||||
// loop-invariant, we won't broadcast it.
|
||||
SmallVector<Value *, 4> Indices;
|
||||
for (auto Index : enumerate(GEP->indices())) {
|
||||
Value *User = Index.value().get();
|
||||
if (IsIndexLoopInvariant[Index.index()])
|
||||
Indices.push_back(User);
|
||||
else
|
||||
Indices.push_back(getOrCreateVectorValue(User, Part));
|
||||
}
|
||||
|
||||
// Create the new GEP. Note that this GEP may be a scalar if VF == 1,
|
||||
// but it should be a vector, otherwise.
|
||||
auto *NewGEP =
|
||||
GEP->isInBounds()
|
||||
? Builder.CreateInBoundsGEP(GEP->getSourceElementType(), Ptr,
|
||||
Indices)
|
||||
: Builder.CreateGEP(GEP->getSourceElementType(), Ptr, Indices);
|
||||
assert((VF == 1 || NewGEP->getType()->isVectorTy()) &&
|
||||
"NewGEP is not a pointer vector");
|
||||
VectorLoopValueMap.setVectorValue(GEP, Part, NewGEP);
|
||||
addMetadata(NewGEP, GEP);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
|
||||
unsigned VF) {
|
||||
PHINode *P = cast<PHINode>(PN);
|
||||
|
@ -4063,76 +4137,8 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
|
|||
switch (I.getOpcode()) {
|
||||
case Instruction::Br:
|
||||
case Instruction::PHI:
|
||||
case Instruction::GetElementPtr:
|
||||
llvm_unreachable("This instruction is handled by a different recipe.");
|
||||
case Instruction::GetElementPtr: {
|
||||
// Construct a vector GEP by widening the operands of the scalar GEP as
|
||||
// necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
|
||||
// results in a vector of pointers when at least one operand of the GEP
|
||||
// is vector-typed. Thus, to keep the representation compact, we only use
|
||||
// vector-typed operands for loop-varying values.
|
||||
auto *GEP = cast<GetElementPtrInst>(&I);
|
||||
|
||||
if (VF > 1 && OrigLoop->hasLoopInvariantOperands(GEP)) {
|
||||
// If we are vectorizing, but the GEP has only loop-invariant operands,
|
||||
// the GEP we build (by only using vector-typed operands for
|
||||
// loop-varying values) would be a scalar pointer. Thus, to ensure we
|
||||
// produce a vector of pointers, we need to either arbitrarily pick an
|
||||
// operand to broadcast, or broadcast a clone of the original GEP.
|
||||
// Here, we broadcast a clone of the original.
|
||||
//
|
||||
// TODO: If at some point we decide to scalarize instructions having
|
||||
// loop-invariant operands, this special case will no longer be
|
||||
// required. We would add the scalarization decision to
|
||||
// collectLoopScalars() and teach getVectorValue() to broadcast
|
||||
// the lane-zero scalar value.
|
||||
auto *Clone = Builder.Insert(GEP->clone());
|
||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||
Value *EntryPart = Builder.CreateVectorSplat(VF, Clone);
|
||||
VectorLoopValueMap.setVectorValue(&I, Part, EntryPart);
|
||||
addMetadata(EntryPart, GEP);
|
||||
}
|
||||
} else {
|
||||
// If the GEP has at least one loop-varying operand, we are sure to
|
||||
// produce a vector of pointers. But if we are only unrolling, we want
|
||||
// to produce a scalar GEP for each unroll part. Thus, the GEP we
|
||||
// produce with the code below will be scalar (if VF == 1) or vector
|
||||
// (otherwise). Note that for the unroll-only case, we still maintain
|
||||
// values in the vector mapping with initVector, as we do for other
|
||||
// instructions.
|
||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||
// The pointer operand of the new GEP. If it's loop-invariant, we
|
||||
// won't broadcast it.
|
||||
auto *Ptr =
|
||||
OrigLoop->isLoopInvariant(GEP->getPointerOperand())
|
||||
? GEP->getPointerOperand()
|
||||
: getOrCreateVectorValue(GEP->getPointerOperand(), Part);
|
||||
|
||||
// Collect all the indices for the new GEP. If any index is
|
||||
// loop-invariant, we won't broadcast it.
|
||||
SmallVector<Value *, 4> Indices;
|
||||
for (auto &U : make_range(GEP->idx_begin(), GEP->idx_end())) {
|
||||
if (OrigLoop->isLoopInvariant(U.get()))
|
||||
Indices.push_back(U.get());
|
||||
else
|
||||
Indices.push_back(getOrCreateVectorValue(U.get(), Part));
|
||||
}
|
||||
|
||||
// Create the new GEP. Note that this GEP may be a scalar if VF == 1,
|
||||
// but it should be a vector, otherwise.
|
||||
auto *NewGEP =
|
||||
GEP->isInBounds()
|
||||
? Builder.CreateInBoundsGEP(GEP->getSourceElementType(), Ptr,
|
||||
Indices)
|
||||
: Builder.CreateGEP(GEP->getSourceElementType(), Ptr, Indices);
|
||||
assert((VF == 1 || NewGEP->getType()->isVectorTy()) &&
|
||||
"NewGEP is not a pointer vector");
|
||||
VectorLoopValueMap.setVectorValue(&I, Part, NewGEP);
|
||||
addMetadata(NewGEP, GEP);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case Instruction::UDiv:
|
||||
case Instruction::SDiv:
|
||||
case Instruction::SRem:
|
||||
|
@ -6831,7 +6837,6 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
|
|||
case Instruction::FPTrunc:
|
||||
case Instruction::FRem:
|
||||
case Instruction::FSub:
|
||||
case Instruction::GetElementPtr:
|
||||
case Instruction::ICmp:
|
||||
case Instruction::IntToPtr:
|
||||
case Instruction::Load:
|
||||
|
@ -6896,12 +6901,13 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
|
|||
|
||||
if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range))
|
||||
return false;
|
||||
|
||||
// If this ingredient's recipe is to be recorded, keep its recipe a singleton
|
||||
// to avoid having to split recipes later.
|
||||
bool IsSingleton = Ingredient2Recipe.count(I);
|
||||
|
||||
// Success: widen this instruction. We optimize the common case where
|
||||
// Success: widen this instruction.
|
||||
|
||||
// Use the default widening recipe. We optimize the common case where
|
||||
// consecutive instructions can be represented by a single recipe.
|
||||
if (!IsSingleton && !VPBB->empty() && LastExtensibleRecipe == &VPBB->back() &&
|
||||
LastExtensibleRecipe->appendInstruction(I))
|
||||
|
@ -6999,7 +7005,23 @@ bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range,
|
|||
return true;
|
||||
}
|
||||
|
||||
// Check if Instr is to be widened by a general VPWidenRecipe.
|
||||
// Handle GEP widening.
|
||||
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
|
||||
auto Scalarize = [&](unsigned VF) {
|
||||
return CM.isScalarWithPredication(Instr, VF) ||
|
||||
CM.isScalarAfterVectorization(Instr, VF) ||
|
||||
CM.isProfitableToScalarize(Instr, VF);
|
||||
};
|
||||
if (LoopVectorizationPlanner::getDecisionAndClampRange(Scalarize, Range))
|
||||
return false;
|
||||
VPWidenGEPRecipe *Recipe = new VPWidenGEPRecipe(GEP, OrigLoop);
|
||||
setRecipe(Instr, Recipe);
|
||||
VPBB->appendRecipe(Recipe);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if Instr is to be widened by a general VPWidenRecipe, after
|
||||
// having first checked for specific widening recipes.
|
||||
if (tryToWiden(Instr, VPBB, Range))
|
||||
return true;
|
||||
|
||||
|
@ -7241,7 +7263,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
|
|||
|
||||
SmallPtrSet<Instruction *, 1> DeadInstructions;
|
||||
VPlanHCFGTransforms::VPInstructionsToVPRecipes(
|
||||
Plan, Legal->getInductionVars(), DeadInstructions);
|
||||
OrigLoop, Plan, Legal->getInductionVars(), DeadInstructions);
|
||||
|
||||
return Plan;
|
||||
}
|
||||
|
@ -7271,6 +7293,11 @@ void VPWidenRecipe::execute(VPTransformState &State) {
|
|||
State.ILV->widenInstruction(Instr);
|
||||
}
|
||||
|
||||
void VPWidenGEPRecipe::execute(VPTransformState &State) {
|
||||
State.ILV->widenGEP(GEP, State.UF, State.VF, IsPtrLoopInvariant,
|
||||
IsIndexLoopInvariant);
|
||||
}
|
||||
|
||||
void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
|
||||
assert(!State.Instance && "Int or FP induction being replicated.");
|
||||
State.ILV->widenIntOrFpInduction(IV, Trunc);
|
||||
|
|
|
@ -683,6 +683,16 @@ void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O,
|
|||
O << " " << VPlanIngredient(IV) << "\\l\"";
|
||||
}
|
||||
|
||||
void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent) const {
|
||||
O << " +\n" << Indent << "\"WIDEN-GEP ";
|
||||
O << (IsPtrLoopInvariant ? "Inv" : "Var");
|
||||
size_t IndicesNumber = IsIndexLoopInvariant.size();
|
||||
for (size_t I = 0; I < IndicesNumber; ++I)
|
||||
O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
|
||||
O << "\\l\"";
|
||||
O << " +\n" << Indent << "\" " << VPlanIngredient(GEP) << "\\l\"";
|
||||
}
|
||||
|
||||
void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent) const {
|
||||
O << " +\n" << Indent << "\"WIDEN-PHI " << VPlanIngredient(Phi) << "\\l\"";
|
||||
}
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "llvm/ADT/DepthFirstIterator.h"
|
||||
#include "llvm/ADT/GraphTraits.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/SmallBitVector.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
|
@ -587,6 +588,7 @@ public:
|
|||
VPInterleaveSC,
|
||||
VPPredInstPHISC,
|
||||
VPReplicateSC,
|
||||
VPWidenGEPSC,
|
||||
VPWidenIntOrFpInductionSC,
|
||||
VPWidenMemoryInstructionSC,
|
||||
VPWidenPHISC,
|
||||
|
@ -749,6 +751,36 @@ public:
|
|||
void print(raw_ostream &O, const Twine &Indent) const override;
|
||||
};
|
||||
|
||||
/// A recipe for handling GEP instructions.
|
||||
class VPWidenGEPRecipe : public VPRecipeBase {
|
||||
private:
|
||||
GetElementPtrInst *GEP;
|
||||
bool IsPtrLoopInvariant;
|
||||
SmallBitVector IsIndexLoopInvariant;
|
||||
|
||||
public:
|
||||
VPWidenGEPRecipe(GetElementPtrInst *GEP, Loop *OrigLoop)
|
||||
: VPRecipeBase(VPWidenGEPSC), GEP(GEP),
|
||||
IsIndexLoopInvariant(GEP->getNumIndices(), false) {
|
||||
IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand());
|
||||
for (auto Index : enumerate(GEP->indices()))
|
||||
IsIndexLoopInvariant[Index.index()] =
|
||||
OrigLoop->isLoopInvariant(Index.value().get());
|
||||
}
|
||||
~VPWidenGEPRecipe() override = default;
|
||||
|
||||
/// Method to support type inquiry through isa, cast, and dyn_cast.
|
||||
static inline bool classof(const VPRecipeBase *V) {
|
||||
return V->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC;
|
||||
}
|
||||
|
||||
/// Generate the gep nodes.
|
||||
void execute(VPTransformState &State) override;
|
||||
|
||||
/// Print the recipe.
|
||||
void print(raw_ostream &O, const Twine &Indent) const override;
|
||||
};
|
||||
|
||||
/// A recipe for handling phi nodes of integer and floating-point inductions,
|
||||
/// producing their vector and scalar values.
|
||||
class VPWidenIntOrFpInductionRecipe : public VPRecipeBase {
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
using namespace llvm;
|
||||
|
||||
void VPlanHCFGTransforms::VPInstructionsToVPRecipes(
|
||||
VPlanPtr &Plan,
|
||||
Loop *OrigLoop, VPlanPtr &Plan,
|
||||
LoopVectorizationLegality::InductionList *Inductions,
|
||||
SmallPtrSetImpl<Instruction *> &DeadInstructions) {
|
||||
|
||||
|
@ -64,6 +64,8 @@ void VPlanHCFGTransforms::VPInstructionsToVPRecipes(
|
|||
NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi);
|
||||
} else
|
||||
NewRecipe = new VPWidenPHIRecipe(Phi);
|
||||
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
|
||||
NewRecipe = new VPWidenGEPRecipe(GEP, OrigLoop);
|
||||
} else {
|
||||
// If the last recipe is a VPWidenRecipe, add Inst to it instead of
|
||||
// creating a new recipe.
|
||||
|
|
|
@ -25,7 +25,7 @@ public:
|
|||
/// Replaces the VPInstructions in \p Plan with corresponding
|
||||
/// widen recipes.
|
||||
static void VPInstructionsToVPRecipes(
|
||||
VPlanPtr &Plan,
|
||||
Loop *OrigLoop, VPlanPtr &Plan,
|
||||
LoopVectorizationLegality::InductionList *Inductions,
|
||||
SmallPtrSetImpl<Instruction *> &DeadInstructions);
|
||||
};
|
||||
|
|
|
@ -89,8 +89,8 @@ TEST_F(VPlanHCFGTest, testBuildHCFGInnerLoop) {
|
|||
|
||||
LoopVectorizationLegality::InductionList Inductions;
|
||||
SmallPtrSet<Instruction *, 1> DeadInstructions;
|
||||
VPlanHCFGTransforms::VPInstructionsToVPRecipes(Plan, &Inductions,
|
||||
DeadInstructions);
|
||||
VPlanHCFGTransforms::VPInstructionsToVPRecipes(
|
||||
LI->getLoopFor(LoopHeader), Plan, &Inductions, DeadInstructions);
|
||||
}
|
||||
|
||||
TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
|
||||
|
@ -119,8 +119,8 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
|
|||
|
||||
LoopVectorizationLegality::InductionList Inductions;
|
||||
SmallPtrSet<Instruction *, 1> DeadInstructions;
|
||||
VPlanHCFGTransforms::VPInstructionsToVPRecipes(Plan, &Inductions,
|
||||
DeadInstructions);
|
||||
VPlanHCFGTransforms::VPInstructionsToVPRecipes(
|
||||
LI->getLoopFor(LoopHeader), Plan, &Inductions, DeadInstructions);
|
||||
|
||||
VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
|
||||
EXPECT_NE(nullptr, Entry->getSingleSuccessor());
|
||||
|
@ -136,7 +136,7 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
|
|||
auto *Phi = dyn_cast<VPWidenPHIRecipe>(&*Iter++);
|
||||
EXPECT_NE(nullptr, Phi);
|
||||
|
||||
auto *Idx = dyn_cast<VPWidenRecipe>(&*Iter++);
|
||||
auto *Idx = dyn_cast<VPWidenGEPRecipe>(&*Iter++);
|
||||
EXPECT_NE(nullptr, Idx);
|
||||
|
||||
auto *Load = dyn_cast<VPWidenMemoryInstructionRecipe>(&*Iter++);
|
||||
|
|
Loading…
Reference in New Issue