forked from OSchip/llvm-project
reverted the prev commit due to assertion failure
llvm-svn: 273258
This commit is contained in:
parent
9823c995bc
commit
a266cf0518
|
@ -679,11 +679,9 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
|
|||
/// to \p PtrToStride and therefore add further predicates to \p PSE.
|
||||
/// The \p Assume parameter indicates if we are allowed to make additional
|
||||
/// run-time assumptions.
|
||||
/// The \p ShouldCheckWrap indicates that we should ensure that address
|
||||
/// calculation does not wrap.
|
||||
int getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
|
||||
const ValueToValueMap &StridesMap = ValueToValueMap(),
|
||||
bool Assume = false, bool ShouldCheckWrap = true);
|
||||
bool Assume = false);
|
||||
|
||||
/// \brief Returns true if the memory operations \p A and \p B are consecutive.
|
||||
/// This is a simple API that does not depend on the analysis pass.
|
||||
|
|
|
@ -866,7 +866,7 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
|
|||
/// \brief Check whether the access through \p Ptr has a constant stride.
|
||||
int llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
|
||||
const Loop *Lp, const ValueToValueMap &StridesMap,
|
||||
bool Assume, bool ShouldCheckWrap) {
|
||||
bool Assume) {
|
||||
Type *Ty = Ptr->getType();
|
||||
assert(Ty->isPointerTy() && "Unexpected non-ptr");
|
||||
|
||||
|
@ -905,9 +905,9 @@ int llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
|
|||
// to access the pointer value "0" which is undefined behavior in address
|
||||
// space 0, therefore we can also vectorize this case.
|
||||
bool IsInBoundsGEP = isInBoundsGep(Ptr);
|
||||
bool IsNoWrapAddRec = !ShouldCheckWrap ||
|
||||
PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
|
||||
isNoWrapAddRec(Ptr, AR, PSE, Lp);
|
||||
bool IsNoWrapAddRec =
|
||||
PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
|
||||
isNoWrapAddRec(Ptr, AR, PSE, Lp);
|
||||
bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
|
||||
if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
|
||||
if (Assume) {
|
||||
|
|
|
@ -2156,13 +2156,87 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
|
|||
}
|
||||
|
||||
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
|
||||
assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
|
||||
auto *SE = PSE.getSE();
|
||||
// Make sure that the pointer does not point to structs.
|
||||
if (Ptr->getType()->getPointerElementType()->isAggregateType())
|
||||
return 0;
|
||||
|
||||
const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() :
|
||||
ValueToValueMap();
|
||||
// If this value is a pointer induction variable, we know it is consecutive.
|
||||
PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
|
||||
if (Phi && Inductions.count(Phi)) {
|
||||
InductionDescriptor II = Inductions[Phi];
|
||||
return II.getConsecutiveDirection();
|
||||
}
|
||||
|
||||
GetElementPtrInst *Gep = getGEPInstruction(Ptr);
|
||||
if (!Gep)
|
||||
return 0;
|
||||
|
||||
unsigned NumOperands = Gep->getNumOperands();
|
||||
Value *GpPtr = Gep->getPointerOperand();
|
||||
// If this GEP value is a consecutive pointer induction variable and all of
|
||||
// the indices are constant, then we know it is consecutive.
|
||||
Phi = dyn_cast<PHINode>(GpPtr);
|
||||
if (Phi && Inductions.count(Phi)) {
|
||||
|
||||
// Make sure that the pointer does not point to structs.
|
||||
PointerType *GepPtrType = cast<PointerType>(GpPtr->getType());
|
||||
if (GepPtrType->getElementType()->isAggregateType())
|
||||
return 0;
|
||||
|
||||
// Make sure that all of the index operands are loop invariant.
|
||||
for (unsigned i = 1; i < NumOperands; ++i)
|
||||
if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
|
||||
return 0;
|
||||
|
||||
InductionDescriptor II = Inductions[Phi];
|
||||
return II.getConsecutiveDirection();
|
||||
}
|
||||
|
||||
unsigned InductionOperand = getGEPInductionOperand(Gep);
|
||||
|
||||
// Check that all of the gep indices are uniform except for our induction
|
||||
// operand.
|
||||
for (unsigned i = 0; i != NumOperands; ++i)
|
||||
if (i != InductionOperand &&
|
||||
!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
|
||||
return 0;
|
||||
|
||||
// We can emit wide load/stores only if the last non-zero index is the
|
||||
// induction variable.
|
||||
const SCEV *Last = nullptr;
|
||||
if (!getSymbolicStrides() || !getSymbolicStrides()->count(Gep))
|
||||
Last = PSE.getSCEV(Gep->getOperand(InductionOperand));
|
||||
else {
|
||||
// Because of the multiplication by a stride we can have a s/zext cast.
|
||||
// We are going to replace this stride by 1 so the cast is safe to ignore.
|
||||
//
|
||||
// %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
// %0 = trunc i64 %indvars.iv to i32
|
||||
// %mul = mul i32 %0, %Stride1
|
||||
// %idxprom = zext i32 %mul to i64 << Safe cast.
|
||||
// %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom
|
||||
//
|
||||
Last = replaceSymbolicStrideSCEV(PSE, *getSymbolicStrides(),
|
||||
Gep->getOperand(InductionOperand), Gep);
|
||||
if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(Last))
|
||||
Last =
|
||||
(C->getSCEVType() == scSignExtend || C->getSCEVType() == scZeroExtend)
|
||||
? C->getOperand()
|
||||
: Last;
|
||||
}
|
||||
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) {
|
||||
const SCEV *Step = AR->getStepRecurrence(*SE);
|
||||
|
||||
// The memory is consecutive because the last index is consecutive
|
||||
// and all other indices are loop invariant.
|
||||
if (Step->isOne())
|
||||
return 1;
|
||||
if (Step->isAllOnesValue())
|
||||
return -1;
|
||||
}
|
||||
|
||||
int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
|
||||
if (Stride == 1 || Stride == -1)
|
||||
return Stride;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2543,8 +2617,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
|
|||
Ptr = Builder.Insert(Gep2);
|
||||
} else { // No GEP
|
||||
// Use the induction element ptr.
|
||||
assert(isa<SCEVAddRecExpr>(PSE.getSE()->getSCEV(Ptr)) &&
|
||||
"Invalid induction ptr");
|
||||
assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
|
||||
setDebugLocFromInst(Builder, Ptr);
|
||||
VectorParts &PtrVal = getVectorValue(Ptr);
|
||||
Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -instcombine -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
;; Check consecutive memory access without preceding GEP instruction
|
||||
|
||||
; for (int i=0; i<len; i++) {
|
||||
; *to++ = *from++;
|
||||
; }
|
||||
|
||||
; CHECK-LABEL: @consecutive_no_gep(
|
||||
; CHECK: vector.body
|
||||
; CHECK: %[[index:.*]] = phi i64 [ 0, %vector.ph ]
|
||||
; CHECK: getelementptr float, float* %{{.*}}, i64 %[[index]]
|
||||
; CHECK: load <4 x float>
|
||||
|
||||
define void @consecutive_no_gep(float* noalias nocapture readonly %from, float* noalias nocapture %to, i32 %len) #0 {
|
||||
entry:
|
||||
%cmp2 = icmp sgt i32 %len, 0
|
||||
br i1 %cmp2, label %for.body.preheader, label %for.end
|
||||
|
||||
for.body.preheader: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.preheader, %for.body
|
||||
%i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
|
||||
%from.addr.04 = phi float* [ %incdec.ptr, %for.body ], [ %from, %for.body.preheader ]
|
||||
%to.addr.03 = phi float* [ %incdec.ptr1, %for.body ], [ %to, %for.body.preheader ]
|
||||
%incdec.ptr = getelementptr inbounds float, float* %from.addr.04, i64 1
|
||||
%val = load float, float* %from.addr.04, align 4
|
||||
%incdec.ptr1 = getelementptr inbounds float, float* %to.addr.03, i64 1
|
||||
store float %val, float* %to.addr.03, align 4
|
||||
%inc = add nsw i32 %i.05, 1
|
||||
%cmp = icmp slt i32 %inc, %len
|
||||
br i1 %cmp, label %for.body, label %for.end.loopexit
|
||||
|
||||
for.end.loopexit: ; preds = %for.body
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue