[LAA] Support memchecks involving loop-invariant addresses

We used to only allow SCEVAddRecExpr for pointer expressions in order to
be able to compute the bounds.  However this is also trivially possible
for loop-invariant addresses (scUnknown) since then the bounds are the
address itself.

Interestingly, we used allow this for the special case when the
loop-invariant address happens to also be an SCEVAddRecExpr (in an outer
loop).

There are a couple more loops that are vectorized in SPEC after this.
My guess is that the main reason we don't see more because for example a
loop-invariant load is vectorized into a splat vector with several
vector-inserts.  This is likely to make the vectorization unprofitable.
I.e. we don't notice that a later LICM will move all of this out of the
loop so the cost estimate should really be 0.

llvm-svn: 264243
This commit is contained in:
Adam Nemet 2016-03-24 04:28:47 +00:00
parent 972bea8a2e
commit 279784ffc4
2 changed files with 69 additions and 17 deletions

View File

@ -130,26 +130,35 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
PredicatedScalarEvolution &PSE) {
// Get the stride replaced scev.
const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
ScalarEvolution *SE = PSE.getSE();
const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
const SCEV *ScStart = AR->getStart();
const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
const SCEV *Step = AR->getStepRecurrence(*SE);
const SCEV *ScStart;
const SCEV *ScEnd;
// For expressions with negative step, the upper bound is ScStart and the
// lower bound is ScEnd.
if (const SCEVConstant *CStep = dyn_cast<const SCEVConstant>(Step)) {
if (CStep->getValue()->isNegative())
std::swap(ScStart, ScEnd);
} else {
// Fallback case: the step is not constant, but the we can still
// get the upper and lower bounds of the interval by using min/max
// expressions.
ScStart = SE->getUMinExpr(ScStart, ScEnd);
ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
if (SE->isLoopInvariant(Sc, Lp)) {
ScStart = ScEnd = Sc;
}
else {
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
ScStart = AR->getStart();
ScEnd = AR->evaluateAtIteration(Ex, *SE);
const SCEV *Step = AR->getStepRecurrence(*SE);
// For expressions with negative step, the upper bound is ScStart and the
// lower bound is ScEnd.
if (const SCEVConstant *CStep = dyn_cast<const SCEVConstant>(Step)) {
if (CStep->getValue()->isNegative())
std::swap(ScStart, ScEnd);
} else {
// Fallback case: the step is not constant, but the we can still
// get the upper and lower bounds of the interval by using min/max
// expressions.
ScStart = SE->getUMinExpr(ScStart, ScEnd);
ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
}
}
Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
@ -524,6 +533,11 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
const ValueToValueMap &Strides, Value *Ptr,
Loop *L) {
const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
// The bounds for loop-invariant pointer is trivial.
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
return true;
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (!AR)
return false;

View File

@ -0,0 +1,38 @@
; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
; Handle memchecks involving loop-invariant addresses:
;
; extern int *A, *b;
; for (i = 0; i < N; ++i) {
; A[i] = b;
; }
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; CHECK: Memory dependences are safe with run-time checks
; CHECK: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group ({{.*}}):
; CHECK-NEXT: %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
; CHECK-NEXT: Against group ({{.*}}):
; CHECK-NEXT: i32* %b
define void @f(i32* %a, i32* %b) {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
%loadB = load i32, i32* %b, align 4
store i32 %loadB, i32* %arrayidxA, align 4
%inc = add nuw nsw i64 %ind, 1
%exitcond = icmp eq i64 %inc, 20
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}