forked from OSchip/llvm-project
Fix SCEVExpander: don't try to expand quadratic recurrences outside a loop.
Partial fix for PR17459: wrong code at -O3 on x86_64-linux-gnu (affecting trunk and 3.3) When SCEV expands a recurrence outside of a loop it attempts to scale by the stride of the recurrence. Chained recurrences don't work that way. We could compute binomial coefficients, but would hve to guarantee that the chained AddRec's are in a perfectly reduced form. llvm-svn: 193438
This commit is contained in:
parent
29abce3189
commit
57243da70f
|
@ -26,7 +26,7 @@ namespace llvm {
|
|||
|
||||
/// Return true if the given expression is safe to expand in the sense that
|
||||
/// all materialized values are safe to speculate.
|
||||
bool isSafeToExpand(const SCEV *S);
|
||||
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE);
|
||||
|
||||
/// SCEVExpander - This class uses information about analyze scalars to
|
||||
/// rewrite expressions in canonical form.
|
||||
|
|
|
@ -1233,6 +1233,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
|
|||
|
||||
// Re-apply any non-loop-dominating scale.
|
||||
if (PostLoopScale) {
|
||||
assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
|
||||
Result = InsertNoopCastOfTo(Result, IntTy);
|
||||
Result = Builder.CreateMul(Result,
|
||||
expandCodeFor(PostLoopScale, IntTy));
|
||||
|
@ -1704,28 +1705,43 @@ namespace {
|
|||
// Currently, we only allow division by a nonzero constant here. If this is
|
||||
// inadequate, we could easily allow division by SCEVUnknown by using
|
||||
// ValueTracking to check isKnownNonZero().
|
||||
//
|
||||
// We cannot generally expand recurrences unless the step dominates the loop
|
||||
// header. The expander handles the special case of affine recurrences by
|
||||
// scaling the recurrence outside the loop, but this technique isn't generally
|
||||
// applicable. Expanding a nested recurrence outside a loop requires computing
|
||||
// binomial coefficients. This could be done, but the recurrence has to be in a
|
||||
// perfectly reduced form, which can't be guaranteed.
|
||||
struct SCEVFindUnsafe {
|
||||
ScalarEvolution &SE;
|
||||
bool IsUnsafe;
|
||||
|
||||
SCEVFindUnsafe(): IsUnsafe(false) {}
|
||||
SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {}
|
||||
|
||||
bool follow(const SCEV *S) {
|
||||
const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S);
|
||||
if (!D)
|
||||
return true;
|
||||
const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
|
||||
if (SC && !SC->getValue()->isZero())
|
||||
return true;
|
||||
IsUnsafe = true;
|
||||
return false;
|
||||
if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
|
||||
const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
|
||||
if (!SC || SC->getValue()->isZero()) {
|
||||
IsUnsafe = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
|
||||
const SCEV *Step = AR->getStepRecurrence(SE);
|
||||
if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) {
|
||||
IsUnsafe = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool isDone() const { return IsUnsafe; }
|
||||
};
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
bool isSafeToExpand(const SCEV *S) {
|
||||
SCEVFindUnsafe Search;
|
||||
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
|
||||
SCEVFindUnsafe Search(SE);
|
||||
visitAll(S, Search);
|
||||
return !Search.IsUnsafe;
|
||||
}
|
||||
|
|
|
@ -532,7 +532,8 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
|
|||
// and varies predictably *inside* the loop. Evaluate the value it
|
||||
// contains when the loop exits, if possible.
|
||||
const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
|
||||
if (!SE->isLoopInvariant(ExitValue, L) || !isSafeToExpand(ExitValue))
|
||||
if (!SE->isLoopInvariant(ExitValue, L) ||
|
||||
!isSafeToExpand(ExitValue, *SE))
|
||||
continue;
|
||||
|
||||
// Computing the value outside of the loop brings no benefit if :
|
||||
|
|
|
@ -1170,6 +1170,13 @@ public:
|
|||
/// may be used.
|
||||
bool AllFixupsOutsideLoop;
|
||||
|
||||
/// RigidFormula is set to true to guarantee that this use will be associated
|
||||
/// with a single formula--the one that initially matched. Some SCEV
|
||||
/// expressions cannot be expanded. This allows LSR to consider the registers
|
||||
/// used by those expressions without the need to expand them later after
|
||||
/// changing the formula.
|
||||
bool RigidFormula;
|
||||
|
||||
/// WidestFixupType - This records the widest use type for any fixup using
|
||||
/// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
|
||||
/// max fixup widths to be equivalent, because the narrower one may be relying
|
||||
|
@ -1188,6 +1195,7 @@ public:
|
|||
MinOffset(INT64_MAX),
|
||||
MaxOffset(INT64_MIN),
|
||||
AllFixupsOutsideLoop(true),
|
||||
RigidFormula(false),
|
||||
WidestFixupType(0) {}
|
||||
|
||||
bool HasFormulaWithSameRegs(const Formula &F) const;
|
||||
|
@ -1214,6 +1222,9 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
|
|||
/// InsertFormula - If the given formula has not yet been inserted, add it to
|
||||
/// the list, and return true. Return false otherwise.
|
||||
bool LSRUse::InsertFormula(const Formula &F) {
|
||||
if (!Formulae.empty() && RigidFormula)
|
||||
return false;
|
||||
|
||||
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
|
||||
if (F.ScaledReg) Key.push_back(F.ScaledReg);
|
||||
// Unstable sort by host order ok, because this is only used for uniquifying.
|
||||
|
@ -1433,7 +1444,7 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
|
|||
}
|
||||
case LSRUse::ICmpZero:
|
||||
// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg.
|
||||
// Therefore, return 0 in case F.Scale == -1.
|
||||
// Therefore, return 0 in case F.Scale == -1.
|
||||
return F.Scale != -1;
|
||||
|
||||
case LSRUse::Basic:
|
||||
|
@ -2943,7 +2954,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
|
|||
|
||||
// x == y --> x - y == 0
|
||||
const SCEV *N = SE.getSCEV(NV);
|
||||
if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
|
||||
if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
|
||||
// S is normalized, so normalize N before folding it into S
|
||||
// to keep the result normalized.
|
||||
N = TransformForPostIncUse(Normalize, N, CI, 0,
|
||||
|
@ -2986,6 +2997,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
|
|||
/// and loop-computable portions.
|
||||
void
|
||||
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
|
||||
// Mark uses whose expressions cannot be expanded.
|
||||
if (!isSafeToExpand(S, SE))
|
||||
LU.RigidFormula = true;
|
||||
|
||||
Formula F;
|
||||
F.InitialMatch(S, L, SE);
|
||||
bool Inserted = InsertFormula(LU, LUIdx, F);
|
||||
|
@ -4353,6 +4368,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
|
|||
SCEVExpander &Rewriter,
|
||||
SmallVectorImpl<WeakVH> &DeadInsts) const {
|
||||
const LSRUse &LU = Uses[LF.LUIdx];
|
||||
if (LU.RigidFormula)
|
||||
return LF.OperandValToReplace;
|
||||
|
||||
// Determine an input position which will be dominated by the operands and
|
||||
// which will dominate the result.
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
; RUN: opt -loop-reduce -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx"
|
||||
|
||||
; PR15470: LSR miscompile. The test2 function should return '1'.
|
||||
;
|
||||
; SCEV expander cannot expand quadratic recurrences outside of the
|
||||
; loop. This recurrence depends on %sub.us, so can't be expanded.
|
||||
;
|
||||
; CHECK-LABEL: @test2
|
||||
; CHECK-LABEL: test2.loop:
|
||||
; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -16777216, %entry ]
|
||||
; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 16777216
|
||||
;
|
||||
; CHECK=LABEL: for.end:
|
||||
; CHECK: %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
|
||||
; CHECK: %sext.us = mul i32 %lsr.iv.next, %sub.cond.us
|
||||
; CHECK: %f = ashr i32 %sext.us, 24
|
||||
; CHECK: ret i32 %f
|
||||
define i32 @test2() {
|
||||
entry:
|
||||
br label %test2.loop
|
||||
|
||||
test2.loop:
|
||||
%inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test2.loop ]
|
||||
%inc11.us = add nsw i32 %inc1115.us, 1
|
||||
%cmp.us = icmp slt i32 %inc11.us, 2
|
||||
br i1 %cmp.us, label %test2.loop, label %for.end
|
||||
|
||||
for.end:
|
||||
%tobool.us = icmp eq i32 %inc1115.us, 0
|
||||
%sub.us = select i1 %tobool.us, i32 0, i32 0
|
||||
%mul.us = shl i32 %inc1115.us, 24
|
||||
%sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
|
||||
%sext.us = mul i32 %mul.us, %sub.cond.us
|
||||
%f = ashr i32 %sext.us, 24
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret i32 %f
|
||||
}
|
Loading…
Reference in New Issue