[VPlan] Add VPWidenPointerInductionRecipe.

This patch moves pointer induction handling from VPWidenPHIRecipe to its
own recipe. In the process, it adds all information required to generate
code for pointer inductions without relying on Legal to access the list
of induction phis.

Alternatively VPWidenPHIRecipe could also take an optional pointer to InductionDescriptor.

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D121615
This commit is contained in:
Florian Hahn 2022-03-24 14:58:23 +00:00
parent 488c772920
commit 46432a0088
No known key found for this signature in database
GPG Key ID: CF59919C6547A668
8 changed files with 195 additions and 143 deletions

View File

@ -315,6 +315,10 @@ public:
/// floating point induction.
const InductionDescriptor *getIntOrFpInductionDescriptor(PHINode *Phi) const;
/// Returns a pointer to the induction descriptor, if \p Phi is pointer
/// induction.
const InductionDescriptor *getPointerInductionDescriptor(PHINode *Phi) const;
/// Returns True if V is a cast that is part of an induction def-use chain,
/// and had been proven to be redundant under a runtime guard (in other
/// words, the cast has the same SCEV expression as the induction phi).

View File

@ -971,6 +971,16 @@ LoopVectorizationLegality::getIntOrFpInductionDescriptor(PHINode *Phi) const {
return nullptr;
}
const InductionDescriptor *
LoopVectorizationLegality::getPointerInductionDescriptor(PHINode *Phi) const {
if (!isInductionPhi(Phi))
return nullptr;
auto &ID = getInductionVars().find(Phi)->second;
if (ID.getKind() == InductionDescriptor::IK_PtrInduction)
return &ID;
return nullptr;
}
bool LoopVectorizationLegality::isCastedInductionVariable(
const Value *V) const {
auto *Inst = dyn_cast<Instruction>(V);

View File

@ -490,9 +490,8 @@ public:
/// new unrolled loop, where UF is the unroll factor.
using VectorParts = SmallVector<Value *, 2>;
/// Vectorize a single first-order recurrence or pointer induction PHINode in
/// a block. This method handles the induction variable canonicalization. It
/// supports both VF = 1 for unrolled loops and arbitrary length vectors.
/// Vectorize a single vector PHINode in a block in the VPlan-native path
/// only.
void widenPHIInstruction(Instruction *PN, VPWidenPHIRecipe *PhiR,
VPTransformState &State);
@ -4217,139 +4216,18 @@ bool InnerLoopVectorizer::useOrderedReductions(
void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
VPWidenPHIRecipe *PhiR,
VPTransformState &State) {
PHINode *P = cast<PHINode>(PN);
if (EnableVPlanNativePath) {
// Currently we enter here in the VPlan-native path for non-induction
// PHIs where all control flow is uniform. We simply widen these PHIs.
// Create a vector phi with no operands - the vector phi operands will be
// set at the end of vector code generation.
Type *VecTy = (State.VF.isScalar())
? PN->getType()
: VectorType::get(PN->getType(), State.VF);
Value *VecPhi = Builder.CreatePHI(VecTy, PN->getNumOperands(), "vec.phi");
State.set(PhiR, VecPhi, 0);
OrigPHIsToFix.push_back(P);
return;
}
assert(PN->getParent() == OrigLoop->getHeader() &&
"Non-header phis should have been handled elsewhere");
// In order to support recurrences we need to be able to vectorize Phi nodes.
// Phi nodes have cycles, so we need to vectorize them in two stages. This is
// stage #1: We create a new vector PHI node with no incoming edges. We'll use
// this value when we vectorize all of the instructions that use the PHI.
assert(!Legal->isReductionVariable(P) &&
"reductions should be handled elsewhere");
setDebugLocFromInst(P);
// This PHINode must be an induction variable.
// Make sure that we know about it.
assert(Legal->getInductionVars().count(P) && "Not an induction variable");
InductionDescriptor II = Legal->getInductionVars().lookup(P);
const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
auto *IVR = PhiR->getParent()->getPlan()->getCanonicalIV();
PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, 0));
// FIXME: The newly created binary instructions should contain nsw/nuw flags,
// which can be found from the original scalar operations.
switch (II.getKind()) {
case InductionDescriptor::IK_NoInduction:
llvm_unreachable("Unknown induction");
case InductionDescriptor::IK_IntInduction:
case InductionDescriptor::IK_FpInduction:
llvm_unreachable("Integer/fp induction is handled elsewhere.");
case InductionDescriptor::IK_PtrInduction: {
// Handle the pointer induction variable case.
assert(P->getType()->isPointerTy() && "Unexpected type.");
if (all_of(PhiR->users(), [PhiR](const VPUser *U) {
return cast<VPRecipeBase>(U)->usesScalars(PhiR);
})) {
// This is the normalized GEP that starts counting at zero.
Value *PtrInd =
Builder.CreateSExtOrTrunc(CanonicalIV, II.getStep()->getType());
// Determine the number of scalars we need to generate for each unroll
// iteration. If the instruction is uniform, we only need to generate the
// first lane. Otherwise, we generate all VF values.
bool IsUniform = vputils::onlyFirstLaneUsed(PhiR);
assert((IsUniform || !State.VF.isScalable()) &&
"Cannot scalarize a scalable VF");
unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue();
for (unsigned Part = 0; Part < UF; ++Part) {
Value *PartStart =
createStepForVF(Builder, PtrInd->getType(), VF, Part);
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
Value *Idx = Builder.CreateAdd(
PartStart, ConstantInt::get(PtrInd->getType(), Lane));
Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
Value *Step = CreateStepValue(II.getStep(), *PSE.getSE(),
State.CFG.PrevBB->getTerminator());
Value *SclrGep = emitTransformedIndex(Builder, GlobalIdx,
II.getStartValue(), Step, II);
SclrGep->setName("next.gep");
State.set(PhiR, SclrGep, VPIteration(Part, Lane));
}
}
return;
}
assert(isa<SCEVConstant>(II.getStep()) &&
"Induction step not a SCEV constant!");
Type *PhiType = II.getStep()->getType();
// Build a pointer phi
Value *ScalarStartValue = PhiR->getStartValue()->getLiveInIRValue();
Type *ScStValueType = ScalarStartValue->getType();
PHINode *NewPointerPhi =
PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV);
NewPointerPhi->addIncoming(ScalarStartValue, LoopVectorPreHeader);
// A pointer induction, performed by using a gep
BasicBlock *LoopLatch = LI->getLoopFor(State.CFG.PrevBB)->getLoopLatch();
Instruction *InductionLoc = LoopLatch->getTerminator();
const SCEV *ScalarStep = II.getStep();
SCEVExpander Exp(*PSE.getSE(), DL, "induction");
Value *ScalarStepValue =
Exp.expandCodeFor(ScalarStep, PhiType, InductionLoc);
Value *RuntimeVF = getRuntimeVF(Builder, PhiType, VF);
Value *NumUnrolledElems =
Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, State.UF));
Value *InductionGEP = GetElementPtrInst::Create(
II.getElementType(), NewPointerPhi,
Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind",
InductionLoc);
NewPointerPhi->addIncoming(InductionGEP, LoopLatch);
// Create UF many actual address geps that use the pointer
// phi as base and a vectorized version of the step value
// (<step*0, ..., step*N>) as offset.
for (unsigned Part = 0; Part < State.UF; ++Part) {
Type *VecPhiType = VectorType::get(PhiType, State.VF);
Value *StartOffsetScalar =
Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, Part));
Value *StartOffset =
Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
// Create a vector of consecutive numbers from zero to VF.
StartOffset =
Builder.CreateAdd(StartOffset, Builder.CreateStepVector(VecPhiType));
Value *GEP = Builder.CreateGEP(
II.getElementType(), NewPointerPhi,
Builder.CreateMul(
StartOffset, Builder.CreateVectorSplat(State.VF, ScalarStepValue),
"vector.gep"));
State.set(PhiR, GEP, Part);
}
}
}
assert(EnableVPlanNativePath &&
"Non-native vplans are not expected to have VPWidenPHIRecipes.");
// Currently we enter here in the VPlan-native path for non-induction
// PHIs where all control flow is uniform. We simply widen these PHIs.
// Create a vector phi with no operands - the vector phi operands will be
// set at the end of vector code generation.
Type *VecTy = (State.VF.isScalar())
? PN->getType()
: VectorType::get(PN->getType(), State.VF);
Value *VecPhi = Builder.CreatePHI(VecTy, PN->getNumOperands(), "vec.phi");
State.set(PhiR, VecPhi, 0);
OrigPHIsToFix.push_back(cast<PHINode>(PN));
}
/// A helper function for checking whether an integer division-related
@ -8336,7 +8214,7 @@ createWidenInductionRecipe(PHINode *Phi, Instruction *PhiOrTrunc,
!NeedsScalarIVOnly, SE);
}
VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI(
PHINode *Phi, ArrayRef<VPValue *> Operands, VFRange &Range) const {
// Check if this is an integer or fp induction. If so, build the recipe that
@ -8345,6 +8223,10 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
return createWidenInductionRecipe(Phi, Phi, Operands[0], *II, CM,
*PSE.getSE(), *OrigLoop, Range);
// Check if this is pointer induction. If so, build the recipe for it.
if (auto *II = Legal->getPointerInductionDescriptor(Phi))
return new VPWidenPointerInductionRecipe(Phi, Operands[0], *II,
*PSE.getSE());
return nullptr;
}
@ -9660,6 +9542,102 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
VecInd->addIncoming(LastInduction, LoopVectorLatch);
}
void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
assert(IndDesc.getKind() == InductionDescriptor::IK_PtrInduction &&
"Not a pointer induction according to InductionDescriptor!");
PHINode *Phi = cast<PHINode>(getUnderlyingInstr());
assert(Phi->getType()->isPointerTy() && "Unexpected type.");
auto *IVR = getParent()->getPlan()->getCanonicalIV();
PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, 0));
if (all_of(users(), [this](const VPUser *U) {
return cast<VPRecipeBase>(U)->usesScalars(this);
})) {
// This is the normalized GEP that starts counting at zero.
Value *PtrInd = State.Builder.CreateSExtOrTrunc(
CanonicalIV, IndDesc.getStep()->getType());
// Determine the number of scalars we need to generate for each unroll
// iteration. If the instruction is uniform, we only need to generate the
// first lane. Otherwise, we generate all VF values.
bool IsUniform = vputils::onlyFirstLaneUsed(this);
assert((IsUniform || !State.VF.isScalable()) &&
"Cannot scalarize a scalable VF");
unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue();
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *PartStart =
createStepForVF(State.Builder, PtrInd->getType(), State.VF, Part);
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
Value *Idx = State.Builder.CreateAdd(
PartStart, ConstantInt::get(PtrInd->getType(), Lane));
Value *GlobalIdx = State.Builder.CreateAdd(PtrInd, Idx);
Value *Step = CreateStepValue(IndDesc.getStep(), SE,
State.CFG.PrevBB->getTerminator());
Value *SclrGep = emitTransformedIndex(
State.Builder, GlobalIdx, IndDesc.getStartValue(), Step, IndDesc);
SclrGep->setName("next.gep");
State.set(this, SclrGep, VPIteration(Part, Lane));
}
}
return;
}
assert(isa<SCEVConstant>(IndDesc.getStep()) &&
"Induction step not a SCEV constant!");
Type *PhiType = IndDesc.getStep()->getType();
// Build a pointer phi
Value *ScalarStartValue = getStartValue()->getLiveInIRValue();
Type *ScStValueType = ScalarStartValue->getType();
PHINode *NewPointerPhi =
PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV);
NewPointerPhi->addIncoming(ScalarStartValue, State.CFG.VectorPreHeader);
// A pointer induction, performed by using a gep
BasicBlock *LoopLatch =
State.LI->getLoopFor(State.CFG.PrevBB)->getLoopLatch();
const DataLayout &DL = LoopLatch->getModule()->getDataLayout();
Instruction *InductionLoc = LoopLatch->getTerminator();
const SCEV *ScalarStep = IndDesc.getStep();
SCEVExpander Exp(SE, DL, "induction");
Value *ScalarStepValue = Exp.expandCodeFor(ScalarStep, PhiType, InductionLoc);
Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
Value *NumUnrolledElems =
State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, State.UF));
Value *InductionGEP = GetElementPtrInst::Create(
IndDesc.getElementType(), NewPointerPhi,
State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind",
InductionLoc);
NewPointerPhi->addIncoming(InductionGEP, LoopLatch);
// Create UF many actual address geps that use the pointer
// phi as base and a vectorized version of the step value
// (<step*0, ..., step*N>) as offset.
for (unsigned Part = 0; Part < State.UF; ++Part) {
Type *VecPhiType = VectorType::get(PhiType, State.VF);
Value *StartOffsetScalar =
State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, Part));
Value *StartOffset =
State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
// Create a vector of consecutive numbers from zero to VF.
StartOffset = State.Builder.CreateAdd(
StartOffset, State.Builder.CreateStepVector(VecPhiType));
Value *GEP = State.Builder.CreateGEP(
IndDesc.getElementType(), NewPointerPhi,
State.Builder.CreateMul(
StartOffset,
State.Builder.CreateVectorSplat(State.VF, ScalarStepValue),
"vector.gep"));
State.set(this, GEP, Part);
}
}
void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "VPScalarIVStepsRecipe being replicated.");

View File

@ -74,9 +74,9 @@ class VPRecipeBuilder {
/// Check if an induction recipe should be constructed for \I. If so build and
/// return it. If not, return null.
VPWidenIntOrFpInductionRecipe *
tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef<VPValue *> Operands,
VFRange &Range) const;
VPRecipeBase *tryToOptimizeInductionPHI(PHINode *Phi,
ArrayRef<VPValue *> Operands,
VFRange &Range) const;
/// Optimize the special case where the operand of \p I is a constant integer
/// induction variable.

View File

@ -575,6 +575,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPBranchOnMaskSC:
return false;
case VPWidenIntOrFpInductionSC:
case VPWidenPointerInductionSC:
case VPWidenCanonicalIVSC:
case VPWidenPHISC:
case VPBlendSC:
@ -981,7 +982,8 @@ void VPlan::execute(VPTransformState *State) {
for (VPRecipeBase &R : Header->phis()) {
// Skip phi-like recipes that generate their backedege values themselves.
// TODO: Model their backedge values explicitly.
if (isa<VPWidenIntOrFpInductionRecipe>(&R) || isa<VPWidenPHIRecipe>(&R))
if (isa<VPWidenIntOrFpInductionRecipe>(&R) || isa<VPWidenPHIRecipe>(&R) ||
isa<VPWidenPointerInductionRecipe>(&R))
continue;
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
@ -1272,6 +1274,16 @@ void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
} else
O << " " << VPlanIngredient(IV);
}
void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "EMIT ";
printAsOperand(O, SlotTracker);
O << " = WIDEN-POINTER-INDUCTION ";
getStartValue()->printAsOperand(O, SlotTracker);
O << ", " << *IndDesc.getStep();
}
#endif
bool VPWidenIntOrFpInductionRecipe::isCanonical() const {

View File

@ -1185,6 +1185,9 @@ public:
VPValue *getStartValue() {
return getNumOperands() == 0 ? nullptr : getOperand(0);
}
VPValue *getStartValue() const {
return getNumOperands() == 0 ? nullptr : getOperand(0);
}
/// Returns the incoming value from the loop backedge.
VPValue *getBackedgeValue() {
@ -1198,6 +1201,49 @@ public:
}
};
class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
const InductionDescriptor &IndDesc;
/// SCEV used to expand step.
/// FIXME: move expansion of step to the pre-header, once it is modeled
/// explicitly.
ScalarEvolution &SE;
public:
/// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
/// Start.
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start,
const InductionDescriptor &IndDesc,
ScalarEvolution &SE)
: VPHeaderPHIRecipe(VPVWidenPointerInductionSC, VPWidenPointerInductionSC,
Phi),
IndDesc(IndDesc), SE(SE) {
addOperand(Start);
}
~VPWidenPointerInductionRecipe() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPRecipeBase *B) {
return B->getVPDefID() == VPRecipeBase::VPWidenPointerInductionSC;
}
static inline bool classof(const VPHeaderPHIRecipe *R) {
return R->getVPDefID() == VPRecipeBase::VPWidenPointerInductionSC;
}
static inline bool classof(const VPValue *V) {
return V->getVPValueID() == VPValue::VPVWidenPointerInductionSC;
}
/// Generate vector values for the pointer induction.
void execute(VPTransformState &State) override;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
};
/// A recipe for handling header phis that are widened in the vector loop.
/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
/// managed in the recipe directly.

View File

@ -106,6 +106,7 @@ public:
VPVFirstOrderRecurrencePHISC,
VPVWidenPHISC,
VPVWidenIntOrFpInductionSC,
VPVWidenPointerInductionSC,
VPVPredInstPHI,
VPVReductionPHISC,
};
@ -346,6 +347,7 @@ public:
VPFirstOrderRecurrencePHISC,
VPWidenPHISC,
VPWidenIntOrFpInductionSC,
VPWidenPointerInductionSC,
VPPredInstPHISC,
VPReductionPHISC,
VPFirstPHISC = VPBlendSC,

View File

@ -13,8 +13,8 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: loop.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-NEXT: WIDEN-PHI %ptr.iv.1 = phi %start.1, %ptr.iv.1.next
; CHECK-NEXT: WIDEN-PHI %ptr.iv.2 = phi %start.2, %ptr.iv.2.next
; CHECK-NEXT: EMIT ir<%ptr.iv.1> = WIDEN-POINTER-INDUCTION ir<%start.1>, 1
; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, 1
; CHECK-NEXT: WIDEN-GEP Var[Inv] ir<%ptr.iv.2.next> = getelementptr ir<%ptr.iv.2>, ir<1>
; CHECK-NEXT: WIDEN store ir<%ptr.iv.1>, ir<%ptr.iv.2.next>
; CHECK-NEXT: WIDEN ir<%lv> = load ir<%ptr.iv.2>