forked from OSchip/llvm-project
[Loop Vectorizer] Handling loops FP induction variables.
Allowed loop vectorization with secondary FP IVs. Like this: float *A; float x = init; for (int i=0; i < N; ++i) { A[i] = x; x -= fp_inc; } The auto-vectorization is possible when the induction binary operator is "fast" or the function has "unsafe" attribute. Differential Revision: https://reviews.llvm.org/D21330 llvm-svn: 276554
This commit is contained in:
parent
93ea19b9a6
commit
376a18bd92
|
@ -263,13 +263,15 @@ public:
|
|||
enum InductionKind {
|
||||
IK_NoInduction, ///< Not an induction variable.
|
||||
IK_IntInduction, ///< Integer induction variable. Step = C.
|
||||
IK_PtrInduction ///< Pointer induction var. Step = C / sizeof(elem).
|
||||
IK_PtrInduction, ///< Pointer induction var. Step = C / sizeof(elem).
|
||||
IK_FpInduction ///< Floating point induction variable.
|
||||
};
|
||||
|
||||
public:
|
||||
/// Default constructor - creates an invalid induction.
|
||||
InductionDescriptor()
|
||||
: StartValue(nullptr), IK(IK_NoInduction), Step(nullptr) {}
|
||||
: StartValue(nullptr), IK(IK_NoInduction), Step(nullptr),
|
||||
InductionBinOp(nullptr) {}
|
||||
|
||||
/// Get the consecutive direction. Returns:
|
||||
/// 0 - unknown or non-consecutive.
|
||||
|
@ -291,26 +293,58 @@ public:
|
|||
const SCEV *getStep() const { return Step; }
|
||||
ConstantInt *getConstIntStepValue() const;
|
||||
|
||||
/// Returns true if \p Phi is an induction. If \p Phi is an induction,
|
||||
/// the induction descriptor \p D will contain the data describing this
|
||||
/// induction. If by some other means the caller has a better SCEV
|
||||
/// Returns true if \p Phi is an induction in the loop \p L. If \p Phi is an
|
||||
/// induction, the induction descriptor \p D will contain the data describing
|
||||
/// this induction. If by some other means the caller has a better SCEV
|
||||
/// expression for \p Phi than the one returned by the ScalarEvolution
|
||||
/// analysis, it can be passed through \p Expr.
|
||||
static bool isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
|
||||
static bool isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE,
|
||||
InductionDescriptor &D,
|
||||
const SCEV *Expr = nullptr);
|
||||
|
||||
/// Returns true if \p Phi is an induction, in the context associated with
|
||||
/// the run-time predicate of PSE. If \p Assume is true, this can add further
|
||||
/// SCEV predicates to \p PSE in order to prove that \p Phi is an induction.
|
||||
/// Returns true if \p Phi is a floating point induction in the loop \p L.
|
||||
/// If \p Phi is an induction, the induction descriptor \p D will contain
|
||||
/// the data describing this induction.
|
||||
static bool isFPInductionPHI(PHINode *Phi, const Loop* L,
|
||||
ScalarEvolution *SE, InductionDescriptor &D);
|
||||
|
||||
/// Returns true if \p Phi is a loop \p L induction, in the context associated
|
||||
/// with the run-time predicate of PSE. If \p Assume is true, this can add
|
||||
/// further SCEV predicates to \p PSE in order to prove that \p Phi is an
|
||||
/// induction.
|
||||
/// If \p Phi is an induction, \p D will contain the data describing this
|
||||
/// induction.
|
||||
static bool isInductionPHI(PHINode *Phi, PredicatedScalarEvolution &PSE,
|
||||
static bool isInductionPHI(PHINode *Phi, const Loop* L,
|
||||
PredicatedScalarEvolution &PSE,
|
||||
InductionDescriptor &D, bool Assume = false);
|
||||
|
||||
/// Returns true if the induction type is FP and the binary operator does
|
||||
/// not have the "fast-math" property. Such operation requires a relaxed FP
|
||||
/// mode.
|
||||
bool hasUnsafeAlgebra() {
|
||||
return InductionBinOp &&
|
||||
!cast<FPMathOperator>(InductionBinOp)->hasUnsafeAlgebra();
|
||||
}
|
||||
|
||||
/// Returns induction operator that does not have "fast-math" property
|
||||
/// and requires FP unsafe mode.
|
||||
Instruction *getUnsafeAlgebraInst() {
|
||||
if (!InductionBinOp ||
|
||||
cast<FPMathOperator>(InductionBinOp)->hasUnsafeAlgebra())
|
||||
return nullptr;
|
||||
return InductionBinOp;
|
||||
}
|
||||
|
||||
/// Returns binary opcode of the induction operator.
|
||||
Instruction::BinaryOps getInductionOpcode() const {
|
||||
return InductionBinOp ? InductionBinOp->getOpcode() :
|
||||
Instruction::BinaryOpsEnd;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Private constructor - used by \c isInductionPHI.
|
||||
InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step);
|
||||
InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step,
|
||||
BinaryOperator *InductionBinOp = nullptr);
|
||||
|
||||
/// Start value.
|
||||
TrackingVH<Value> StartValue;
|
||||
|
@ -318,6 +352,8 @@ private:
|
|||
InductionKind IK;
|
||||
/// Step value.
|
||||
const SCEV *Step;
|
||||
// Instruction that advances induction variable.
|
||||
BinaryOperator *InductionBinOp;
|
||||
};
|
||||
|
||||
BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
|
||||
|
|
|
@ -703,7 +703,7 @@ bool LoopInterchangeLegality::findInductionAndReductions(
|
|||
RecurrenceDescriptor RD;
|
||||
InductionDescriptor ID;
|
||||
PHINode *PHI = cast<PHINode>(I);
|
||||
if (InductionDescriptor::isInductionPHI(PHI, SE, ID))
|
||||
if (InductionDescriptor::isInductionPHI(PHI, L, SE, ID))
|
||||
Inductions.push_back(PHI);
|
||||
else if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
|
||||
Reductions.push_back(PHI);
|
||||
|
|
|
@ -654,8 +654,8 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
|
|||
}
|
||||
|
||||
InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
|
||||
const SCEV *Step)
|
||||
: StartValue(Start), IK(K), Step(Step) {
|
||||
const SCEV *Step, BinaryOperator *BOp)
|
||||
: StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
|
||||
assert(IK != IK_NoInduction && "Not an induction");
|
||||
|
||||
// Start value type should match the induction kind and the value
|
||||
|
@ -672,7 +672,15 @@ InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
|
|||
|
||||
assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
|
||||
"Step value should be constant for pointer induction");
|
||||
assert(Step->getType()->isIntegerTy() && "StepValue is not an integer");
|
||||
assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
|
||||
"StepValue is not an integer");
|
||||
|
||||
assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
|
||||
"StepValue is not FP for FpInduction");
|
||||
assert((IK != IK_FpInduction || (InductionBinOp &&
|
||||
(InductionBinOp->getOpcode() == Instruction::FAdd ||
|
||||
InductionBinOp->getOpcode() == Instruction::FSub))) &&
|
||||
"Binary opcode should be specified for FP induction");
|
||||
}
|
||||
|
||||
int InductionDescriptor::getConsecutiveDirection() const {
|
||||
|
@ -693,6 +701,8 @@ Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index,
|
|||
const DataLayout& DL) const {
|
||||
|
||||
SCEVExpander Exp(*SE, DL, "induction");
|
||||
assert(Index->getType() == Step->getType() &&
|
||||
"Index type does not match StepValue type");
|
||||
switch (IK) {
|
||||
case IK_IntInduction: {
|
||||
assert(Index->getType() == StartValue->getType() &&
|
||||
|
@ -717,29 +727,113 @@ Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index,
|
|||
return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint());
|
||||
}
|
||||
case IK_PtrInduction: {
|
||||
assert(Index->getType() == Step->getType() &&
|
||||
"Index type does not match StepValue type");
|
||||
assert(isa<SCEVConstant>(Step) &&
|
||||
"Expected constant step for pointer induction");
|
||||
const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step);
|
||||
Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint());
|
||||
return B.CreateGEP(nullptr, StartValue, Index);
|
||||
}
|
||||
case IK_FpInduction: {
|
||||
assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
|
||||
assert(InductionBinOp &&
|
||||
(InductionBinOp->getOpcode() == Instruction::FAdd ||
|
||||
InductionBinOp->getOpcode() == Instruction::FSub) &&
|
||||
"Original bin op should be defined for FP induction");
|
||||
|
||||
Value *StepValue = cast<SCEVUnknown>(Step)->getValue();
|
||||
|
||||
// Floating point operations had to be 'fast' to enable the induction.
|
||||
FastMathFlags Flags;
|
||||
Flags.setUnsafeAlgebra();
|
||||
|
||||
Value *MulExp = B.CreateFMul(StepValue, Index);
|
||||
if (isa<Instruction>(MulExp))
|
||||
// We have to check, the MulExp may be a constant.
|
||||
cast<Instruction>(MulExp)->setFastMathFlags(Flags);
|
||||
|
||||
Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue,
|
||||
MulExp, "induction");
|
||||
if (isa<Instruction>(BOp))
|
||||
cast<Instruction>(BOp)->setFastMathFlags(Flags);
|
||||
|
||||
return BOp;
|
||||
}
|
||||
case IK_NoInduction:
|
||||
return nullptr;
|
||||
}
|
||||
llvm_unreachable("invalid enum");
|
||||
}
|
||||
|
||||
bool InductionDescriptor::isInductionPHI(PHINode *Phi,
|
||||
bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
|
||||
ScalarEvolution *SE,
|
||||
InductionDescriptor &D) {
|
||||
|
||||
// Here we only handle FP induction variables.
|
||||
assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
|
||||
|
||||
if (TheLoop->getHeader() != Phi->getParent())
|
||||
return false;
|
||||
|
||||
// The loop may have multiple entrances or multiple exits; we can analyze
|
||||
// this phi if it has a unique entry value and a unique backedge value.
|
||||
if (Phi->getNumIncomingValues() != 2)
|
||||
return false;
|
||||
Value *BEValue = nullptr, *StartValue = nullptr;
|
||||
if (TheLoop->contains(Phi->getIncomingBlock(0))) {
|
||||
BEValue = Phi->getIncomingValue(0);
|
||||
StartValue = Phi->getIncomingValue(1);
|
||||
} else {
|
||||
assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
|
||||
"Unexpected Phi node in the loop");
|
||||
BEValue = Phi->getIncomingValue(1);
|
||||
StartValue = Phi->getIncomingValue(0);
|
||||
}
|
||||
|
||||
BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
|
||||
if (!BOp)
|
||||
return false;
|
||||
|
||||
Value *Addend = nullptr;
|
||||
if (BOp->getOpcode() == Instruction::FAdd) {
|
||||
if (BOp->getOperand(0) == Phi)
|
||||
Addend = BOp->getOperand(1);
|
||||
else if (BOp->getOperand(1) == Phi)
|
||||
Addend = BOp->getOperand(0);
|
||||
} else if (BOp->getOpcode() == Instruction::FSub)
|
||||
if (BOp->getOperand(0) == Phi)
|
||||
Addend = BOp->getOperand(1);
|
||||
|
||||
if (!Addend)
|
||||
return false;
|
||||
|
||||
// The addend should be loop invariant
|
||||
if (auto *I = dyn_cast<Instruction>(Addend))
|
||||
if (TheLoop->contains(I))
|
||||
return false;
|
||||
|
||||
// FP Step has unknown SCEV
|
||||
const SCEV *Step = SE->getUnknown(Addend);
|
||||
D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
|
||||
PredicatedScalarEvolution &PSE,
|
||||
InductionDescriptor &D,
|
||||
bool Assume) {
|
||||
Type *PhiTy = Phi->getType();
|
||||
// We only handle integer and pointer inductions variables.
|
||||
if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
|
||||
|
||||
// Handle integer and pointer inductions variables.
|
||||
// Now we handle also FP induction but not trying to make a
|
||||
// recurrent expression from the PHI node in-place.
|
||||
|
||||
if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() &&
|
||||
!PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
|
||||
return false;
|
||||
|
||||
if (PhiTy->isFloatingPointTy())
|
||||
return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
|
||||
|
||||
const SCEV *PhiScev = PSE.getSCEV(Phi);
|
||||
const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
|
||||
|
||||
|
@ -752,10 +846,10 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi,
|
|||
return false;
|
||||
}
|
||||
|
||||
return isInductionPHI(Phi, PSE.getSE(), D, AR);
|
||||
return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
|
||||
}
|
||||
|
||||
bool InductionDescriptor::isInductionPHI(PHINode *Phi,
|
||||
bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
|
||||
ScalarEvolution *SE,
|
||||
InductionDescriptor &D,
|
||||
const SCEV *Expr) {
|
||||
|
@ -773,7 +867,7 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi,
|
|||
return false;
|
||||
}
|
||||
|
||||
assert(AR->getLoop()->getHeader() == Phi->getParent() &&
|
||||
assert(TheLoop->getHeader() == Phi->getParent() &&
|
||||
"PHI is an AddRec for a different loop?!");
|
||||
Value *StartValue =
|
||||
Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
|
||||
|
@ -781,7 +875,7 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi,
|
|||
// Calculate the pointer stride and check if it is consecutive.
|
||||
// The stride may be a constant or a loop invariant integer value.
|
||||
const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
|
||||
if (!ConstStep && !SE->isLoopInvariant(Step, AR->getLoop()))
|
||||
if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
|
||||
return false;
|
||||
|
||||
if (PhiTy->isIntegerTy()) {
|
||||
|
|
|
@ -402,7 +402,10 @@ protected:
|
|||
|
||||
/// This function adds (StartIdx, StartIdx + Step, StartIdx + 2*Step, ...)
|
||||
/// to each vector element of Val. The sequence starts at StartIndex.
|
||||
virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step);
|
||||
/// \p Opcode is relevant for FP induction variable.
|
||||
virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step,
|
||||
Instruction::BinaryOps Opcode =
|
||||
Instruction::BinaryOpsEnd);
|
||||
|
||||
/// Compute scalar induction steps. \p ScalarIV is the scalar induction
|
||||
/// variable on which to base the steps, \p Step is the size of the step, and
|
||||
|
@ -625,7 +628,9 @@ private:
|
|||
bool IfPredicateStore = false) override;
|
||||
void vectorizeMemoryInstruction(Instruction *Instr) override;
|
||||
Value *getBroadcastInstrs(Value *V) override;
|
||||
Value *getStepVector(Value *Val, int StartIdx, Value *Step) override;
|
||||
Value *getStepVector(Value *Val, int StartIdx, Value *Step,
|
||||
Instruction::BinaryOps Opcode =
|
||||
Instruction::BinaryOpsEnd) override;
|
||||
Value *reverseVector(Value *Vec) override;
|
||||
};
|
||||
|
||||
|
@ -2000,32 +2005,60 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry,
|
|||
}
|
||||
}
|
||||
|
||||
Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
|
||||
Value *Step) {
|
||||
Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step,
|
||||
Instruction::BinaryOps BinOp) {
|
||||
// Create and check the types.
|
||||
assert(Val->getType()->isVectorTy() && "Must be a vector");
|
||||
assert(Val->getType()->getScalarType()->isIntegerTy() &&
|
||||
"Elem must be an integer");
|
||||
assert(Step->getType() == Val->getType()->getScalarType() &&
|
||||
"Step has wrong type");
|
||||
// Create the types.
|
||||
Type *ITy = Val->getType()->getScalarType();
|
||||
VectorType *Ty = cast<VectorType>(Val->getType());
|
||||
int VLen = Ty->getNumElements();
|
||||
int VLen = Val->getType()->getVectorNumElements();
|
||||
|
||||
Type *STy = Val->getType()->getScalarType();
|
||||
assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
|
||||
"Induction Step must be an integer or FP");
|
||||
assert(Step->getType() == STy && "Step has wrong type");
|
||||
|
||||
SmallVector<Constant *, 8> Indices;
|
||||
|
||||
if (STy->isIntegerTy()) {
|
||||
// Create a vector of consecutive numbers from zero to VF.
|
||||
for (int i = 0; i < VLen; ++i)
|
||||
Indices.push_back(ConstantInt::get(STy, StartIdx + i));
|
||||
|
||||
// Add the consecutive indices to the vector value.
|
||||
Constant *Cv = ConstantVector::get(Indices);
|
||||
assert(Cv->getType() == Val->getType() && "Invalid consecutive vec");
|
||||
Step = Builder.CreateVectorSplat(VLen, Step);
|
||||
assert(Step->getType() == Val->getType() && "Invalid step vec");
|
||||
// FIXME: The newly created binary instructions should contain nsw/nuw flags,
|
||||
// which can be found from the original scalar operations.
|
||||
Step = Builder.CreateMul(Cv, Step);
|
||||
return Builder.CreateAdd(Val, Step, "induction");
|
||||
}
|
||||
|
||||
// Floating point induction.
|
||||
assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
|
||||
"Binary Opcode should be specified for FP induction");
|
||||
// Create a vector of consecutive numbers from zero to VF.
|
||||
for (int i = 0; i < VLen; ++i)
|
||||
Indices.push_back(ConstantInt::get(ITy, StartIdx + i));
|
||||
Indices.push_back(ConstantFP::get(STy, (double)(StartIdx + i)));
|
||||
|
||||
// Add the consecutive indices to the vector value.
|
||||
Constant *Cv = ConstantVector::get(Indices);
|
||||
assert(Cv->getType() == Val->getType() && "Invalid consecutive vec");
|
||||
|
||||
Step = Builder.CreateVectorSplat(VLen, Step);
|
||||
assert(Step->getType() == Val->getType() && "Invalid step vec");
|
||||
// FIXME: The newly created binary instructions should contain nsw/nuw flags,
|
||||
// which can be found from the original scalar operations.
|
||||
Step = Builder.CreateMul(Cv, Step);
|
||||
return Builder.CreateAdd(Val, Step, "induction");
|
||||
|
||||
// Floating point operations had to be 'fast' to enable the induction.
|
||||
FastMathFlags Flags;
|
||||
Flags.setUnsafeAlgebra();
|
||||
|
||||
Value *MulOp = Builder.CreateFMul(Cv, Step);
|
||||
if (isa<Instruction>(MulOp))
|
||||
// Have to check, MulOp may be a constant
|
||||
cast<Instruction>(MulOp)->setFastMathFlags(Flags);
|
||||
|
||||
Value *BOp = Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
|
||||
if (isa<Instruction>(BOp))
|
||||
cast<Instruction>(BOp)->setFastMathFlags(Flags);
|
||||
return BOp;
|
||||
}
|
||||
|
||||
void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
|
||||
|
@ -3099,8 +3132,10 @@ void InnerLoopVectorizer::createEmptyLoop() {
|
|||
EndValue = CountRoundDown;
|
||||
} else {
|
||||
IRBuilder<> B(LoopBypassBlocks.back()->getTerminator());
|
||||
Value *CRD = B.CreateSExtOrTrunc(CountRoundDown,
|
||||
II.getStep()->getType(), "cast.crd");
|
||||
Type *StepType = II.getStep()->getType();
|
||||
Instruction::CastOps CastOp =
|
||||
CastInst::getCastOpcode(CountRoundDown, true, StepType, true);
|
||||
Value *CRD = B.CreateCast(CastOp, CountRoundDown, StepType, "cast.crd");
|
||||
const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
|
||||
EndValue = II.transform(B, CRD, PSE.getSE(), DL);
|
||||
EndValue->setName("ind.end");
|
||||
|
@ -4047,7 +4082,7 @@ void InnerLoopVectorizer::widenPHIInstruction(
|
|||
llvm_unreachable("Unknown induction");
|
||||
case InductionDescriptor::IK_IntInduction:
|
||||
return widenIntInduction(P, Entry);
|
||||
case InductionDescriptor::IK_PtrInduction:
|
||||
case InductionDescriptor::IK_PtrInduction: {
|
||||
// Handle the pointer induction variable case.
|
||||
assert(P->getType()->isPointerTy() && "Unexpected type.");
|
||||
// This is the normalized GEP that starts counting at zero.
|
||||
|
@ -4080,6 +4115,29 @@ void InnerLoopVectorizer::widenPHIInstruction(
|
|||
}
|
||||
return;
|
||||
}
|
||||
case InductionDescriptor::IK_FpInduction: {
|
||||
assert(P->getType() == II.getStartValue()->getType() &&
|
||||
"Types must match");
|
||||
// Handle other induction variables that are now based on the
|
||||
// canonical one.
|
||||
assert(P != OldInduction && "Primary induction can be integer only");
|
||||
|
||||
Value *V = Builder.CreateCast(Instruction::SIToFP, Induction, P->getType());
|
||||
V = II.transform(Builder, V, PSE.getSE(), DL);
|
||||
V->setName("fp.offset.idx");
|
||||
|
||||
// Now we have scalar op: %fp.offset.idx = StartVal +/- Induction*StepVal
|
||||
|
||||
Value *Broadcasted = getBroadcastInstrs(V);
|
||||
// After broadcasting the induction variable we need to make the vector
|
||||
// consecutive by adding StepVal*0, StepVal*1, StepVal*2, etc.
|
||||
Value *StepVal = cast<SCEVUnknown>(II.getStep())->getValue();
|
||||
for (unsigned part = 0; part < UF; ++part)
|
||||
Entry[part] = getStepVector(Broadcasted, VF * part, StepVal,
|
||||
II.getInductionOpcode());
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
|
||||
|
@ -4565,10 +4623,12 @@ void LoopVectorizationLegality::addInductionPhi(
|
|||
const DataLayout &DL = Phi->getModule()->getDataLayout();
|
||||
|
||||
// Get the widest type.
|
||||
if (!WidestIndTy)
|
||||
WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
|
||||
else
|
||||
WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
|
||||
if (!PhiTy->isFloatingPointTy()) {
|
||||
if (!WidestIndTy)
|
||||
WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
|
||||
else
|
||||
WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
|
||||
}
|
||||
|
||||
// Int inductions are special because we only allow one IV.
|
||||
if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
|
||||
|
@ -4649,8 +4709,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
|
|||
}
|
||||
|
||||
InductionDescriptor ID;
|
||||
if (InductionDescriptor::isInductionPHI(Phi, PSE, ID)) {
|
||||
if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
|
||||
addInductionPhi(Phi, ID, AllowedExit);
|
||||
if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
|
||||
Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -4661,7 +4723,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
|
|||
|
||||
// As a last resort, coerce the PHI to a AddRec expression
|
||||
// and re-try classifying it a an induction PHI.
|
||||
if (InductionDescriptor::isInductionPHI(Phi, PSE, ID, true)) {
|
||||
if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
|
||||
addInductionPhi(Phi, ID, AllowedExit);
|
||||
continue;
|
||||
}
|
||||
|
@ -6348,11 +6410,20 @@ Value *InnerLoopUnroller::reverseVector(Value *Vec) { return Vec; }
|
|||
|
||||
Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
|
||||
|
||||
Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step) {
|
||||
Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step,
|
||||
Instruction::BinaryOps BinOp) {
|
||||
// When unrolling and the VF is 1, we only need to add a simple scalar.
|
||||
Type *ITy = Val->getType();
|
||||
assert(!ITy->isVectorTy() && "Val must be a scalar");
|
||||
Constant *C = ConstantInt::get(ITy, StartIdx);
|
||||
Type *Ty = Val->getType();
|
||||
assert(!Ty->isVectorTy() && "Val must be a scalar");
|
||||
|
||||
if (Ty->isFloatingPointTy()) {
|
||||
Constant *C = ConstantFP::get(Ty, (double)StartIdx);
|
||||
|
||||
// Floating point operations had to be 'fast' to enable the unrolling.
|
||||
Value *MulOp = addFastMathFlag(Builder.CreateFMul(C, Step));
|
||||
return addFastMathFlag(Builder.CreateBinOp(BinOp, Val, MulOp));
|
||||
}
|
||||
Constant *C = ConstantInt::get(Ty, StartIdx);
|
||||
return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction");
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
; RUN: opt < %s -O3 -mcpu=core-avx2 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix AUTO_VEC %s
|
||||
|
||||
; This test checks auto-vectorization with FP induction variable.
|
||||
; The FP operation is not "fast" and requires "fast-math" function attribute.
|
||||
|
||||
;void fp_iv_loop1(float * __restrict__ A, int N) {
|
||||
; float x = 1.0;
|
||||
; for (int i=0; i < N; ++i) {
|
||||
; A[i] = x;
|
||||
; x += 0.5;
|
||||
; }
|
||||
;}
|
||||
|
||||
|
||||
; AUTO_VEC-LABEL: @fp_iv_loop1(
|
||||
; AUTO_VEC: vector.body
|
||||
; AUTO_VEC: store <8 x float>
|
||||
|
||||
define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 {
|
||||
entry:
|
||||
%cmp4 = icmp sgt i32 %N, 0
|
||||
br i1 %cmp4, label %for.body.preheader, label %for.end
|
||||
|
||||
for.body.preheader: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.preheader, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
|
||||
%x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
|
||||
%arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
|
||||
store float %x.06, float* %arrayidx, align 4
|
||||
%conv1 = fadd float %x.06, 5.000000e-01
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %N
|
||||
br i1 %exitcond, label %for.end.loopexit, label %for.body
|
||||
|
||||
for.end.loopexit: ; preds = %for.body
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; The same as the previous, FP operation is not fast, different function attribute
|
||||
; Vectorization should be rejected.
|
||||
;void fp_iv_loop2(float * __restrict__ A, int N) {
|
||||
; float x = 1.0;
|
||||
; for (int i=0; i < N; ++i) {
|
||||
; A[i] = x;
|
||||
; x += 0.5;
|
||||
; }
|
||||
;}
|
||||
|
||||
; AUTO_VEC-LABEL: @fp_iv_loop2(
|
||||
; AUTO_VEC-NOT: vector.body
|
||||
; AUTO_VEC-NOT: store <{{.*}} x float>
|
||||
|
||||
define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) #1 {
|
||||
entry:
|
||||
%cmp4 = icmp sgt i32 %N, 0
|
||||
br i1 %cmp4, label %for.body.preheader, label %for.end
|
||||
|
||||
for.body.preheader: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.preheader, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
|
||||
%x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
|
||||
%arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
|
||||
store float %x.06, float* %arrayidx, align 4
|
||||
%conv1 = fadd float %x.06, 5.000000e-01
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %N
|
||||
br i1 %exitcond, label %for.end.loopexit, label %for.body
|
||||
|
||||
for.end.loopexit: ; preds = %for.body
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "no-nans-fp-math"="true" }
|
||||
attributes #1 = { "no-nans-fp-math"="false" }
|
|
@ -0,0 +1,218 @@
|
|||
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL1 %s
|
||||
; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL2 %s
|
||||
; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -dce -instcombine -S | FileCheck --check-prefix VEC1_INTERL2 %s
|
||||
|
||||
; VEC4_INTERL1-LABEL: @fp_iv_loop1(
|
||||
; VEC4_INTERL1: %[[FP_INC:.*]] = load float, float* @fp_inc
|
||||
; VEC4_INTERL1: vector.body:
|
||||
; VEC4_INTERL1: %[[FP_INDEX:.*]] = sitofp i64 {{.*}} to float
|
||||
; VEC4_INTERL1: %[[VEC_INCR:.*]] = fmul fast float {{.*}}, %[[FP_INDEX]]
|
||||
; VEC4_INTERL1: %[[FP_OFFSET_IDX:.*]] = fsub fast float %init, %[[VEC_INCR]]
|
||||
; VEC4_INTERL1: %[[BRCT_INSERT:.*]] = insertelement <4 x float> undef, float %[[FP_OFFSET_IDX]], i32 0
|
||||
; VEC4_INTERL1-NEXT: %[[BRCT_SPLAT:.*]] = shufflevector <4 x float> %[[BRCT_INSERT]], <4 x float> undef, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL1: %[[BRCT_INSERT:.*]] = insertelement {{.*}} %[[FP_INC]]
|
||||
; VEC4_INTERL1-NEXT: %[[FP_INC_BCST:.*]] = shufflevector <4 x float> %[[BRCT_INSERT]], {{.*}} zeroinitializer
|
||||
; VEC4_INTERL1: %[[VSTEP:.*]] = fmul fast <4 x float> %[[FP_INC_BCST]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
|
||||
; VEC4_INTERL1-NEXT: %[[VEC_INDUCTION:.*]] = fsub fast <4 x float> %[[BRCT_SPLAT]], %[[VSTEP]]
|
||||
; VEC4_INTERL1: store <4 x float> %[[VEC_INDUCTION]]
|
||||
|
||||
; VEC4_INTERL2-LABEL: @fp_iv_loop1(
|
||||
; VEC4_INTERL2: %[[FP_INC:.*]] = load float, float* @fp_inc
|
||||
; VEC4_INTERL2: vector.body:
|
||||
; VEC4_INTERL2: %[[INDEX:.*]] = sitofp i64 {{.*}} to float
|
||||
; VEC4_INTERL2: %[[VEC_INCR:.*]] = fmul fast float %{{.*}}, %[[INDEX]]
|
||||
; VEC4_INTERL2: fsub fast float %init, %[[VEC_INCR]]
|
||||
; VEC4_INTERL2: %[[VSTEP1:.*]] = fmul fast <4 x float> %{{.*}}, <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
|
||||
; VEC4_INTERL2-NEXT: %[[VEC_INDUCTION1:.*]] = fsub fast <4 x float> {{.*}}, %[[VSTEP1]]
|
||||
; VEC4_INTERL2: %[[VSTEP2:.*]] = fmul fast <4 x float> %{{.*}}, <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>
|
||||
; VEC4_INTERL2-NEXT: %[[VEC_INDUCTION2:.*]] = fsub fast <4 x float> {{.*}}, %[[VSTEP2]]
|
||||
; VEC4_INTERL2: store <4 x float> %[[VEC_INDUCTION1]]
|
||||
; VEC4_INTERL2: store <4 x float> %[[VEC_INDUCTION2]]
|
||||
|
||||
; VEC1_INTERL2-LABEL: @fp_iv_loop1(
|
||||
; VEC1_INTERL2: %[[FP_INC:.*]] = load float, float* @fp_inc
|
||||
; VEC1_INTERL2: vector.body:
|
||||
; VEC1_INTERL2: %[[INDEX:.*]] = sitofp i64 {{.*}} to float
|
||||
; VEC1_INTERL2: %[[STEP:.*]] = fmul fast float %{{.*}}, %[[INDEX]]
|
||||
; VEC1_INTERL2: %[[FP_OFFSET_IDX:.*]] = fsub fast float %init, %[[STEP]]
|
||||
; VEC1_INTERL2: %[[SCALAR_INDUCTION2:.*]] = fsub fast float %[[FP_OFFSET_IDX]], %[[FP_INC]]
|
||||
; VEC1_INTERL2: store float %[[FP_OFFSET_IDX]]
|
||||
; VEC1_INTERL2: store float %[[SCALAR_INDUCTION2]]
|
||||
|
||||
@fp_inc = common global float 0.000000e+00, align 4
|
||||
|
||||
;void fp_iv_loop1(float init, float * __restrict__ A, int N) {
|
||||
; float x = init;
|
||||
; for (int i=0; i < N; ++i) {
|
||||
; A[i] = x;
|
||||
; x -= fp_inc;
|
||||
; }
|
||||
;}
|
||||
|
||||
define void @fp_iv_loop1(float %init, float* noalias nocapture %A, i32 %N) #1 {
|
||||
entry:
|
||||
%cmp4 = icmp sgt i32 %N, 0
|
||||
br i1 %cmp4, label %for.body.lr.ph, label %for.end
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
%fpinc = load float, float* @fp_inc, align 4
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %for.body.lr.ph
|
||||
%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
|
||||
%x.05 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ]
|
||||
%arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
|
||||
store float %x.05, float* %arrayidx, align 4
|
||||
%add = fsub fast float %x.05, %fpinc
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %N
|
||||
br i1 %exitcond, label %for.end.loopexit, label %for.body
|
||||
|
||||
for.end.loopexit: ; preds = %for.body
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
;void fp_iv_loop2(float init, float * __restrict__ A, int N) {
|
||||
; float x = init;
|
||||
; for (int i=0; i < N; ++i) {
|
||||
; A[i] = x;
|
||||
; x += 0.5;
|
||||
; }
|
||||
;}
|
||||
|
||||
; VEC4_INTERL1-LABEL: @fp_iv_loop2(
|
||||
; VEC4_INTERL1: vector.body
|
||||
; VEC4_INTERL1: %[[index:.*]] = phi i64 [ 0, %vector.ph ]
|
||||
; VEC4_INTERL1: sitofp i64 %[[index]] to float
|
||||
; VEC4_INTERL1: %[[VAR1:.*]] = fmul fast float {{.*}}, 5.000000e-01
|
||||
; VEC4_INTERL1: %[[VAR2:.*]] = fadd fast float %[[VAR1]]
|
||||
; VEC4_INTERL1: insertelement <4 x float> undef, float %[[VAR2]], i32 0
|
||||
; VEC4_INTERL1: shufflevector <4 x float> {{.*}}, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL1: fadd fast <4 x float> {{.*}}, <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00>
|
||||
; VEC4_INTERL1: store <4 x float>
|
||||
|
||||
define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
|
||||
entry:
|
||||
%cmp4 = icmp sgt i32 %N, 0
|
||||
br i1 %cmp4, label %for.body.preheader, label %for.end
|
||||
|
||||
for.body.preheader: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.preheader, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
|
||||
%x.06 = phi float [ %conv1, %for.body ], [ %init, %for.body.preheader ]
|
||||
%arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
|
||||
store float %x.06, float* %arrayidx, align 4
|
||||
%conv1 = fadd fast float %x.06, 5.000000e-01
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %N
|
||||
br i1 %exitcond, label %for.end.loopexit, label %for.body
|
||||
|
||||
for.end.loopexit: ; preds = %for.body
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
;void fp_iv_loop3(float init, float * __restrict__ A, float * __restrict__ B, float * __restrict__ C, int N) {
|
||||
; int i = 0;
|
||||
; float x = init;
|
||||
; float y = 0.1;
|
||||
; for (; i < N; ++i) {
|
||||
; A[i] = x;
|
||||
; x += fp_inc;
|
||||
; y -= 0.5;
|
||||
; B[i] = x + y;
|
||||
; C[i] = y;
|
||||
; }
|
||||
;}
|
||||
; VEC4_INTERL1-LABEL: @fp_iv_loop3(
|
||||
; VEC4_INTERL1: vector.body
|
||||
; VEC4_INTERL1: %[[index:.*]] = phi i64 [ 0, %vector.ph ]
|
||||
; VEC4_INTERL1: sitofp i64 %[[index]] to float
|
||||
; VEC4_INTERL1: %[[VAR1:.*]] = fmul fast float {{.*}}, -5.000000e-01
|
||||
; VEC4_INTERL1: fadd fast float %[[VAR1]]
|
||||
; VEC4_INTERL1: fadd fast <4 x float> {{.*}}, <float -5.000000e-01, float -1.000000e+00, float -1.500000e+00, float -2.000000e+00>
|
||||
; VEC4_INTERL1: store <4 x float>
|
||||
|
||||
define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalias nocapture %B, float* noalias nocapture %C, i32 %N) #1 {
|
||||
entry:
|
||||
%cmp9 = icmp sgt i32 %N, 0
|
||||
br i1 %cmp9, label %for.body.lr.ph, label %for.end
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
%0 = load float, float* @fp_inc, align 4
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %for.body.lr.ph
|
||||
%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
|
||||
%y.012 = phi float [ 0x3FB99999A0000000, %for.body.lr.ph ], [ %conv1, %for.body ]
|
||||
%x.011 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ]
|
||||
%arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
|
||||
store float %x.011, float* %arrayidx, align 4
|
||||
%add = fadd fast float %x.011, %0
|
||||
%conv1 = fadd fast float %y.012, -5.000000e-01
|
||||
%add2 = fadd fast float %conv1, %add
|
||||
%arrayidx4 = getelementptr inbounds float, float* %B, i64 %indvars.iv
|
||||
store float %add2, float* %arrayidx4, align 4
|
||||
%arrayidx6 = getelementptr inbounds float, float* %C, i64 %indvars.iv
|
||||
store float %conv1, float* %arrayidx6, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %N
|
||||
br i1 %exitcond, label %for.end.loopexit, label %for.body
|
||||
|
||||
for.end.loopexit:
|
||||
br label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Start and step values are constants. There is no 'fmul' operation in this case
|
||||
;void fp_iv_loop4(float * __restrict__ A, int N) {
|
||||
; float x = 1.0;
|
||||
; for (int i=0; i < N; ++i) {
|
||||
; A[i] = x;
|
||||
; x += 0.5;
|
||||
; }
|
||||
;}
|
||||
|
||||
; VEC4_INTERL1-LABEL: @fp_iv_loop4(
|
||||
; VEC4_INTERL1: vector.body
|
||||
; VEC4_INTERL1-NOT: fmul fast <4 x float>
|
||||
; VEC4_INTERL1: %[[induction:.*]] = fadd fast <4 x float> %{{.*}}, <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00>
|
||||
; VEC4_INTERL1: store <4 x float> %[[induction]]
|
||||
|
||||
define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
|
||||
entry:
|
||||
%cmp4 = icmp sgt i32 %N, 0
|
||||
br i1 %cmp4, label %for.body.preheader, label %for.end
|
||||
|
||||
for.body.preheader: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.preheader, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
|
||||
%x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
|
||||
%arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
|
||||
store float %x.06, float* %arrayidx, align 4
|
||||
%conv1 = fadd fast float %x.06, 5.000000e-01
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %N
|
||||
br i1 %exitcond, label %for.end.loopexit, label %for.body
|
||||
|
||||
for.end.loopexit: ; preds = %for.body
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue