forked from OSchip/llvm-project
[SCEV] Properly solve quadratic equations
Differential Revision: https://reviews.llvm.org/D48283 llvm-svn: 338758
This commit is contained in:
parent
821649229e
commit
90f3249ce2
|
@ -31,6 +31,7 @@ class raw_ostream;
|
|||
|
||||
template <typename T> class SmallVectorImpl;
|
||||
template <typename T> class ArrayRef;
|
||||
template <typename T> class Optional;
|
||||
|
||||
class APInt;
|
||||
|
||||
|
@ -2166,6 +2167,41 @@ APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
|
|||
/// Return A sign-divided by B, rounded by the given rounding mode.
|
||||
APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
|
||||
|
||||
/// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range
|
||||
/// (e.g. 32 for i32).
|
||||
/// This function finds the smallest number n, such that
|
||||
/// (a) n >= 0 and q(n) = 0, or
|
||||
/// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all
|
||||
/// integers, belong to two different intervals [Rk, Rk+R),
|
||||
/// where R = 2^BW, and k is an integer.
|
||||
/// The idea here is to find when q(n) "overflows" 2^BW, while at the
|
||||
/// same time "allowing" subtraction. In unsigned modulo arithmetic a
|
||||
/// subtraction (treated as addition of negated numbers) would always
|
||||
/// count as an overflow, but here we want to allow values to decrease
|
||||
/// and increase as long as they are within the same interval.
|
||||
/// Specifically, adding of two negative numbers should not cause an
|
||||
/// overflow (as long as the magnitude does not exceed the bith width).
|
||||
/// On the other hand, given a positive number, adding a negative
|
||||
/// number to it can give a negative result, which would cause the
|
||||
/// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is
|
||||
/// treated as a special case of an overflow.
|
||||
///
|
||||
/// This function returns None if after finding k that minimizes the
|
||||
/// positive solution to q(n) = kR, both solutions are contained between
|
||||
/// two consecutive integers.
|
||||
///
|
||||
/// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation
|
||||
/// in arithmetic modulo 2^BW, and treating the values as signed) by the
|
||||
/// virtue of *signed* overflow. This function will *not* find such an n,
|
||||
/// however it may find a value of n satisfying the inequalities due to
|
||||
/// an *unsigned* overflow (if the values are treated as unsigned).
|
||||
/// To find a solution for a signed overflow, treat it as a problem of
|
||||
/// finding an unsigned overflow with a range with of BW-1.
|
||||
///
|
||||
/// The returned value may have a different bit width from the input
|
||||
/// coefficients.
|
||||
Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
|
||||
unsigned RangeWidth);
|
||||
} // End of APIntOps namespace
|
||||
|
||||
// See friend declaration above. This additional declaration is required in
|
||||
|
|
|
@ -8344,69 +8344,273 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B,
|
|||
return SE.getUDivExactExpr(SE.getMulExpr(B, SE.getConstant(I)), D);
|
||||
}
|
||||
|
||||
/// Find the roots of the quadratic equation for the given quadratic chrec
|
||||
/// {L,+,M,+,N}. This returns either the two roots (which might be the same) or
|
||||
/// two SCEVCouldNotCompute objects.
|
||||
static Optional<std::pair<const SCEVConstant *,const SCEVConstant *>>
|
||||
SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
|
||||
/// For a given quadratic addrec, generate coefficients of the corresponding
|
||||
/// quadratic equation, multiplied by a common value to ensure that they are
|
||||
/// integers.
|
||||
/// The returned value is a tuple { A, B, C, M, BitWidth }, where
|
||||
/// Ax^2 + Bx + C is the quadratic function, M is the value that A, B and C
|
||||
/// were multiplied by, and BitWidth is the bit width of the original addrec
|
||||
/// coefficients.
|
||||
/// This function returns None if the addrec coefficients are not compile-
|
||||
/// time constants.
|
||||
static Optional<std::tuple<APInt, APInt, APInt, APInt, unsigned>>
|
||||
GetQuadraticEquation(const SCEVAddRecExpr *AddRec) {
|
||||
assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
|
||||
const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
|
||||
const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
|
||||
const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
|
||||
LLVM_DEBUG(dbgs() << __func__ << ": analyzing quadratic addrec: "
|
||||
<< *AddRec << '\n');
|
||||
|
||||
// We currently can only solve this if the coefficients are constants.
|
||||
if (!LC || !MC || !NC)
|
||||
return None;
|
||||
|
||||
uint32_t BitWidth = LC->getAPInt().getBitWidth();
|
||||
const APInt &L = LC->getAPInt();
|
||||
const APInt &M = MC->getAPInt();
|
||||
const APInt &N = NC->getAPInt();
|
||||
APInt Two(BitWidth, 2);
|
||||
|
||||
// Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
|
||||
|
||||
// The A coefficient is N/2
|
||||
APInt A = N.sdiv(Two);
|
||||
|
||||
// The B coefficient is M-N/2
|
||||
APInt B = M;
|
||||
B -= A; // A is the same as N/2.
|
||||
|
||||
// The C coefficient is L.
|
||||
const APInt& C = L;
|
||||
|
||||
// Compute the B^2-4ac term.
|
||||
APInt SqrtTerm = B;
|
||||
SqrtTerm *= B;
|
||||
SqrtTerm -= 4 * (A * C);
|
||||
|
||||
if (SqrtTerm.isNegative()) {
|
||||
// The loop is provably infinite.
|
||||
if (!LC || !MC || !NC) {
|
||||
LLVM_DEBUG(dbgs() << __func__ << ": coefficients are not constant\n");
|
||||
return None;
|
||||
}
|
||||
|
||||
// Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
|
||||
// integer value or else APInt::sqrt() will assert.
|
||||
APInt SqrtVal = SqrtTerm.sqrt();
|
||||
APInt L = LC->getAPInt();
|
||||
APInt M = MC->getAPInt();
|
||||
APInt N = NC->getAPInt();
|
||||
assert(!N.isNullValue() && "This is not a quadratic addrec");
|
||||
|
||||
// Compute the two solutions for the quadratic formula.
|
||||
// The divisions must be performed as signed divisions.
|
||||
APInt NegB = -std::move(B);
|
||||
APInt TwoA = std::move(A);
|
||||
TwoA <<= 1;
|
||||
if (TwoA.isNullValue())
|
||||
unsigned BitWidth = LC->getAPInt().getBitWidth();
|
||||
unsigned NewWidth = BitWidth + 1;
|
||||
LLVM_DEBUG(dbgs() << __func__ << ": addrec coeff bw: "
|
||||
<< BitWidth << '\n');
|
||||
// The sign-extension (as opposed to a zero-extension) here matches the
|
||||
// extension used in SolveQuadraticEquationWrap (with the same motivation).
|
||||
N = N.sext(NewWidth);
|
||||
M = M.sext(NewWidth);
|
||||
L = L.sext(NewWidth);
|
||||
|
||||
// The increments are M, M+N, M+2N, ..., so the accumulated values are
|
||||
// L+M, (L+M)+(M+N), (L+M)+(M+N)+(M+2N), ..., that is,
|
||||
// L+M, L+2M+N, L+3M+3N, ...
|
||||
// After n iterations the accumulated value Acc is L + nM + n(n-1)/2 N.
|
||||
//
|
||||
// The equation Acc = 0 is then
|
||||
// L + nM + n(n-1)/2 N = 0, or 2L + 2M n + n(n-1) N = 0.
|
||||
// In a quadratic form it becomes:
|
||||
// N n^2 + (2M-N) n + 2L = 0.
|
||||
|
||||
APInt A = N;
|
||||
APInt B = 2 * M - A;
|
||||
APInt C = 2 * L;
|
||||
APInt T = APInt(NewWidth, 2);
|
||||
LLVM_DEBUG(dbgs() << __func__ << ": equation " << A << "x^2 + " << B
|
||||
<< "x + " << C << ", coeff bw: " << NewWidth
|
||||
<< ", multiplied by " << T << '\n');
|
||||
return std::make_tuple(A, B, C, T, BitWidth);
|
||||
}
|
||||
|
||||
/// Helper function to compare optional APInts:
|
||||
/// (a) if X and Y both exist, return min(X, Y),
|
||||
/// (b) if neither X nor Y exist, return None,
|
||||
/// (c) if exactly one of X and Y exists, return that value.
|
||||
static Optional<APInt> MinOptional(Optional<APInt> X, Optional<APInt> Y) {
|
||||
if (X.hasValue() && Y.hasValue()) {
|
||||
unsigned W = std::max(X->getBitWidth(), Y->getBitWidth());
|
||||
APInt XW = X->sextOrSelf(W);
|
||||
APInt YW = Y->sextOrSelf(W);
|
||||
return XW.slt(YW) ? *X : *Y;
|
||||
}
|
||||
if (!X.hasValue() && !Y.hasValue())
|
||||
return None;
|
||||
return X.hasValue() ? *X : *Y;
|
||||
}
|
||||
|
||||
/// Helper function to truncate an optional APInt to a given BitWidth.
|
||||
/// When solving addrec-related equations, it is preferable to return a value
|
||||
/// that has the same bit width as the original addrec's coefficients. If the
|
||||
/// solution fits in the original bit width, truncate it (except for i1).
|
||||
/// Returning a value of a different bit width may inhibit some optimizations.
|
||||
///
|
||||
/// In general, a solution to a quadratic equation generated from an addrec
|
||||
/// may require BW+1 bits, where BW is the bit width of the addrec's
|
||||
/// coefficients. The reason is that the coefficients of the quadratic
|
||||
/// equation are BW+1 bits wide (to avoid truncation when converting from
|
||||
/// the addrec to the equation).
|
||||
static Optional<APInt> TruncIfPossible(Optional<APInt> X, unsigned BitWidth) {
|
||||
if (!X.hasValue())
|
||||
return None;
|
||||
unsigned W = X->getBitWidth();
|
||||
if (BitWidth > 1 && BitWidth < W && X->isIntN(BitWidth))
|
||||
return X->trunc(BitWidth);
|
||||
return X;
|
||||
}
|
||||
|
||||
/// Let c(n) be the value of the quadratic chrec {L,+,M,+,N} after n
|
||||
/// iterations. The values L, M, N are assumed to be signed, and they
|
||||
/// should all have the same bit widths.
|
||||
/// Find the least n >= 0 such that c(n) = 0 in the arithmetic modulo 2^BW,
|
||||
/// where BW is the bit width of the addrec's coefficients.
|
||||
/// If the calculated value is a BW-bit integer (for BW > 1), it will be
|
||||
/// returned as such, otherwise the bit width of the returned value may
|
||||
/// be greater than BW.
|
||||
///
|
||||
/// This function returns None if
|
||||
/// (a) the addrec coefficients are not constant, or
|
||||
/// (b) SolveQuadraticEquationWrap was unable to find a solution. For cases
|
||||
/// like x^2 = 5, no integer solutions exist, in other cases an integer
|
||||
/// solution may exist, but SolveQuadraticEquationWrap may fail to find it.
|
||||
static Optional<APInt>
|
||||
SolveQuadraticAddRecExact(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
|
||||
APInt A, B, C, M;
|
||||
unsigned BitWidth;
|
||||
auto T = GetQuadraticEquation(AddRec);
|
||||
if (!T.hasValue())
|
||||
return None;
|
||||
|
||||
LLVMContext &Context = SE.getContext();
|
||||
std::tie(A, B, C, M, BitWidth) = *T;
|
||||
LLVM_DEBUG(dbgs() << __func__ << ": solving for unsigned overflow\n");
|
||||
Optional<APInt> X = APIntOps::SolveQuadraticEquationWrap(A, B, C, BitWidth+1);
|
||||
if (!X.hasValue())
|
||||
return None;
|
||||
|
||||
ConstantInt *Solution1 =
|
||||
ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
|
||||
ConstantInt *Solution2 =
|
||||
ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
|
||||
ConstantInt *CX = ConstantInt::get(SE.getContext(), *X);
|
||||
ConstantInt *V = EvaluateConstantChrecAtConstant(AddRec, CX, SE);
|
||||
if (!V->isZero())
|
||||
return None;
|
||||
|
||||
return std::make_pair(cast<SCEVConstant>(SE.getConstant(Solution1)),
|
||||
cast<SCEVConstant>(SE.getConstant(Solution2)));
|
||||
return TruncIfPossible(X, BitWidth);
|
||||
}
|
||||
|
||||
/// Let c(n) be the value of the quadratic chrec {0,+,M,+,N} after n
|
||||
/// iterations. The values M, N are assumed to be signed, and they
|
||||
/// should all have the same bit widths.
|
||||
/// Find the least n such that c(n) does not belong to the given range,
|
||||
/// while c(n-1) does.
|
||||
///
|
||||
/// This function returns None if
|
||||
/// (a) the addrec coefficients are not constant, or
|
||||
/// (b) SolveQuadraticEquationWrap was unable to find a solution for the
|
||||
/// bounds of the range.
|
||||
static Optional<APInt>
|
||||
SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec,
|
||||
const ConstantRange &Range, ScalarEvolution &SE) {
|
||||
assert(AddRec->getOperand(0)->isZero() &&
|
||||
"Starting value of addrec should be 0");
|
||||
LLVM_DEBUG(dbgs() << __func__ << ": solving boundary crossing for range "
|
||||
<< Range << ", addrec " << *AddRec << '\n');
|
||||
// This case is handled in getNumIterationsInRange. Here we can assume that
|
||||
// we start in the range.
|
||||
assert(Range.contains(APInt(SE.getTypeSizeInBits(AddRec->getType()), 0)) &&
|
||||
"Addrec's initial value should be in range");
|
||||
|
||||
APInt A, B, C, M;
|
||||
unsigned BitWidth;
|
||||
auto T = GetQuadraticEquation(AddRec);
|
||||
if (!T.hasValue())
|
||||
return None;
|
||||
|
||||
// Be careful about the return value: there can be two reasons for not
|
||||
// returning an actual number. First, if no solutions to the equations
|
||||
// were found, and second, if the solutions don't leave the given range.
|
||||
// The first case means that the actual solution is "unknown", the second
|
||||
// means that it's known, but not valid. If the solution is unknown, we
|
||||
// cannot make any conclusions.
|
||||
// Return a pair: the optional solution and a flag indicating if the
|
||||
// solution was found.
|
||||
auto SolveForBoundary = [&](APInt Bound) -> std::pair<Optional<APInt>,bool> {
|
||||
// Solve for signed overflow and unsigned overflow, pick the lower
|
||||
// solution.
|
||||
LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: checking boundary "
|
||||
<< Bound << " (before multiplying by " << M << ")\n");
|
||||
Bound *= M; // The quadratic equation multiplier.
|
||||
|
||||
Optional<APInt> SO = None;
|
||||
if (BitWidth > 1) {
|
||||
LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for "
|
||||
"signed overflow\n");
|
||||
SO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound, BitWidth);
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for "
|
||||
"unsigned overflow\n");
|
||||
Optional<APInt> UO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound,
|
||||
BitWidth+1);
|
||||
|
||||
auto LeavesRange = [&] (const APInt &X) {
|
||||
ConstantInt *C0 = ConstantInt::get(SE.getContext(), X);
|
||||
ConstantInt *V0 = EvaluateConstantChrecAtConstant(AddRec, C0, SE);
|
||||
if (Range.contains(V0->getValue()))
|
||||
return false;
|
||||
// X should be at least 1, so X-1 is non-negative.
|
||||
ConstantInt *C1 = ConstantInt::get(SE.getContext(), X-1);
|
||||
ConstantInt *V1 = EvaluateConstantChrecAtConstant(AddRec, C1, SE);
|
||||
if (Range.contains(V1->getValue()))
|
||||
return true;
|
||||
return false;
|
||||
};
|
||||
|
||||
// If SolveQuadraticEquationWrap returns None, it means that there can
|
||||
// be a solution, but the function failed to find it. We cannot treat it
|
||||
// as "no solution".
|
||||
if (!SO.hasValue() || !UO.hasValue())
|
||||
return { None, false };
|
||||
|
||||
// Check the smaller value first to see if it leaves the range.
|
||||
// At this point, both SO and UO must have values.
|
||||
Optional<APInt> Min = MinOptional(SO, UO);
|
||||
if (LeavesRange(*Min))
|
||||
return { Min, true };
|
||||
Optional<APInt> Max = Min == SO ? UO : SO;
|
||||
if (LeavesRange(*Max))
|
||||
return { Max, true };
|
||||
|
||||
// Solutions were found, but were eliminated, hence the "true".
|
||||
return { None, true };
|
||||
};
|
||||
|
||||
std::tie(A, B, C, M, BitWidth) = *T;
|
||||
// Lower bound is inclusive, subtract 1 to represent the exiting value.
|
||||
APInt Lower = Range.getLower().sextOrSelf(A.getBitWidth()) - 1;
|
||||
APInt Upper = Range.getUpper().sextOrSelf(A.getBitWidth());
|
||||
auto SL = SolveForBoundary(Lower);
|
||||
auto SU = SolveForBoundary(Upper);
|
||||
// If any of the solutions was unknown, no meaninigful conclusions can
|
||||
// be made.
|
||||
if (!SL.second || !SU.second)
|
||||
return None;
|
||||
|
||||
// Claim: The correct solution is not some value between Min and Max.
|
||||
//
|
||||
// Justification: Assuming that Min and Max are different values, one of
|
||||
// them is when the first signed overflow happens, the other is when the
|
||||
// first unsigned overflow happens. Crossing the range boundary is only
|
||||
// possible via an overflow (treating 0 as a special case of it, modeling
|
||||
// an overflow as crossing k*2^W for some k).
|
||||
//
|
||||
// The interesting case here is when Min was eliminated as an invalid
|
||||
// solution, but Max was not. The argument is that if there was another
|
||||
// overflow between Min and Max, it would also have been eliminated if
|
||||
// it was considered.
|
||||
//
|
||||
// For a given boundary, it is possible to have two overflows of the same
|
||||
// type (signed/unsigned) without having the other type in between: this
|
||||
// can happen when the vertex of the parabola is between the iterations
|
||||
// corresponding to the overflows. This is only possible when the two
|
||||
// overflows cross k*2^W for the same k. In such case, if the second one
|
||||
// left the range (and was the first one to do so), the first overflow
|
||||
// would have to enter the range, which would mean that either we had left
|
||||
// the range before or that we started outside of it. Both of these cases
|
||||
// are contradictions.
|
||||
//
|
||||
// Claim: In the case where SolveForBoundary returns None, the correct
|
||||
// solution is not some value between the Max for this boundary and the
|
||||
// Min of the other boundary.
|
||||
//
|
||||
// Justification: Assume that we had such Max_A and Min_B corresponding
|
||||
// to range boundaries A and B and such that Max_A < Min_B. If there was
|
||||
// a solution between Max_A and Min_B, it would have to be caused by an
|
||||
// overflow corresponding to either A or B. It cannot correspond to B,
|
||||
// since Min_B is the first occurrence of such an overflow. If it
|
||||
// corresponded to A, it would have to be either a signed or an unsigned
|
||||
// overflow that is larger than both eliminated overflows for A. But
|
||||
// between the eliminated overflows and this overflow, the values would
|
||||
// cover the entire value space, thus crossing the other boundary, which
|
||||
// is a contradiction.
|
||||
|
||||
return TruncIfPossible(MinOptional(SL.first, SU.first), BitWidth);
|
||||
}
|
||||
|
||||
ScalarEvolution::ExitLimit
|
||||
|
@ -8441,23 +8645,12 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
|
|||
// If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
|
||||
// the quadratic equation to solve it.
|
||||
if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
|
||||
if (auto Roots = SolveQuadraticEquation(AddRec, *this)) {
|
||||
const SCEVConstant *R1 = Roots->first;
|
||||
const SCEVConstant *R2 = Roots->second;
|
||||
// Pick the smallest positive root value.
|
||||
if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(
|
||||
CmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {
|
||||
if (!CB->getZExtValue())
|
||||
std::swap(R1, R2); // R1 is the minimum root now.
|
||||
|
||||
// We can only use this value if the chrec ends up with an exact zero
|
||||
// value at this index. When solving for "X*X != 5", for example, we
|
||||
// should not accept a root of 2.
|
||||
const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
|
||||
if (Val->isZero())
|
||||
// We found a quadratic root!
|
||||
return ExitLimit(R1, R1, false, Predicates);
|
||||
}
|
||||
// We can only use this value if the chrec ends up with an exact zero
|
||||
// value at this index. When solving for "X*X != 5", for example, we
|
||||
// should not accept a root of 2.
|
||||
if (auto S = SolveQuadraticAddRecExact(AddRec, *this)) {
|
||||
const auto *R = cast<SCEVConstant>(getConstant(S.getValue()));
|
||||
return ExitLimit(R, R, false, Predicates);
|
||||
}
|
||||
return getCouldNotCompute();
|
||||
}
|
||||
|
@ -10565,52 +10758,11 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
|
|||
ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) &&
|
||||
"Linear scev computation is off in a bad way!");
|
||||
return SE.getConstant(ExitValue);
|
||||
} else if (isQuadratic()) {
|
||||
// If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
|
||||
// quadratic equation to solve it. To do this, we must frame our problem in
|
||||
// terms of figuring out when zero is crossed, instead of when
|
||||
// Range.getUpper() is crossed.
|
||||
SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
|
||||
NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
|
||||
const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), FlagAnyWrap);
|
||||
}
|
||||
|
||||
// Next, solve the constructed addrec
|
||||
if (auto Roots =
|
||||
SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE)) {
|
||||
const SCEVConstant *R1 = Roots->first;
|
||||
const SCEVConstant *R2 = Roots->second;
|
||||
// Pick the smallest positive root value.
|
||||
if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(
|
||||
ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {
|
||||
if (!CB->getZExtValue())
|
||||
std::swap(R1, R2); // R1 is the minimum root now.
|
||||
|
||||
// Make sure the root is not off by one. The returned iteration should
|
||||
// not be in the range, but the previous one should be. When solving
|
||||
// for "X*X < 5", for example, we should not return a root of 2.
|
||||
ConstantInt *R1Val =
|
||||
EvaluateConstantChrecAtConstant(this, R1->getValue(), SE);
|
||||
if (Range.contains(R1Val->getValue())) {
|
||||
// The next iteration must be out of the range...
|
||||
ConstantInt *NextVal =
|
||||
ConstantInt::get(SE.getContext(), R1->getAPInt() + 1);
|
||||
|
||||
R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
|
||||
if (!Range.contains(R1Val->getValue()))
|
||||
return SE.getConstant(NextVal);
|
||||
return SE.getCouldNotCompute(); // Something strange happened
|
||||
}
|
||||
|
||||
// If R1 was not in the range, then it is a good return value. Make
|
||||
// sure that R1-1 WAS in the range though, just in case.
|
||||
ConstantInt *NextVal =
|
||||
ConstantInt::get(SE.getContext(), R1->getAPInt() - 1);
|
||||
R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
|
||||
if (Range.contains(R1Val->getValue()))
|
||||
return R1;
|
||||
return SE.getCouldNotCompute(); // Something strange happened
|
||||
}
|
||||
}
|
||||
if (isQuadratic()) {
|
||||
if (auto S = SolveQuadraticAddRecRange(this, Range, SE))
|
||||
return SE.getConstant(S.getValue());
|
||||
}
|
||||
|
||||
return SE.getCouldNotCompute();
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/FoldingSet.h"
|
||||
#include "llvm/ADT/Hashing.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Config/llvm-config.h"
|
||||
|
@ -2707,3 +2708,193 @@ APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
|
|||
}
|
||||
llvm_unreachable("Unknown APInt::Rounding enum");
|
||||
}
|
||||
|
||||
Optional<APInt>
|
||||
llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
|
||||
unsigned RangeWidth) {
|
||||
unsigned CoeffWidth = A.getBitWidth();
|
||||
assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
|
||||
assert(RangeWidth <= CoeffWidth &&
|
||||
"Value range width should be less than coefficient width");
|
||||
assert(RangeWidth > 1 && "Value range bit width should be > 1");
|
||||
|
||||
LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
|
||||
<< "x + " << C << ", rw:" << RangeWidth << '\n');
|
||||
|
||||
// Identify 0 as a (non)solution immediately.
|
||||
if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
|
||||
LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
|
||||
return APInt(CoeffWidth, 0);
|
||||
}
|
||||
|
||||
// The result of APInt arithmetic has the same bit width as the operands,
|
||||
// so it can actually lose high bits. A product of two n-bit integers needs
|
||||
// 2n-1 bits to represent the full value.
|
||||
// The operation done below (on quadratic coefficients) that can produce
|
||||
// the largest value is the evaluation of the equation during bisection,
|
||||
// which needs 3 times the bitwidth of the coefficient, so the total number
|
||||
// of required bits is 3n.
|
||||
//
|
||||
// The purpose of this extension is to simulate the set Z of all integers,
|
||||
// where n+1 > n for all n in Z. In Z it makes sense to talk about positive
|
||||
// and negative numbers (not so much in a modulo arithmetic). The method
|
||||
// used to solve the equation is based on the standard formula for real
|
||||
// numbers, and uses the concepts of "positive" and "negative" with their
|
||||
// usual meanings.
|
||||
CoeffWidth *= 3;
|
||||
A = A.sext(CoeffWidth);
|
||||
B = B.sext(CoeffWidth);
|
||||
C = C.sext(CoeffWidth);
|
||||
|
||||
// Make A > 0 for simplicity. Negate cannot overflow at this point because
|
||||
// the bit width has increased.
|
||||
if (A.isNegative()) {
|
||||
A.negate();
|
||||
B.negate();
|
||||
C.negate();
|
||||
}
|
||||
|
||||
// Solving an equation q(x) = 0 with coefficients in modular arithmetic
|
||||
// is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
|
||||
// and R = 2^BitWidth.
|
||||
// Since we're trying not only to find exact solutions, but also values
|
||||
// that "wrap around", such a set will always have a solution, i.e. an x
|
||||
// that satisfies at least one of the equations, or such that |q(x)|
|
||||
// exceeds kR, while |q(x-1)| for the same k does not.
|
||||
//
|
||||
// We need to find a value k, such that Ax^2 + Bx + C = kR will have a
|
||||
// positive solution n (in the above sense), and also such that the n
|
||||
// will be the least among all solutions corresponding to k = 0, 1, ...
|
||||
// (more precisely, the least element in the set
|
||||
// { n(k) | k is such that a solution n(k) exists }).
|
||||
//
|
||||
// Consider the parabola (over real numbers) that corresponds to the
|
||||
// quadratic equation. Since A > 0, the arms of the parabola will point
|
||||
// up. Picking different values of k will shift it up and down by R.
|
||||
//
|
||||
// We want to shift the parabola in such a way as to reduce the problem
|
||||
// of solving q(x) = kR to solving shifted_q(x) = 0.
|
||||
// (The interesting solutions are the ceilings of the real number
|
||||
// solutions.)
|
||||
APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
|
||||
APInt TwoA = 2 * A;
|
||||
APInt SqrB = B * B;
|
||||
bool PickLow;
|
||||
|
||||
auto RoundUp = [] (const APInt &V, const APInt &A) {
|
||||
assert(A.isStrictlyPositive());
|
||||
APInt T = V.abs().urem(A);
|
||||
if (T.isNullValue())
|
||||
return V;
|
||||
return V.isNegative() ? V+T : V+(A-T);
|
||||
};
|
||||
|
||||
// The vertex of the parabola is at -B/2A, but since A > 0, it's negative
|
||||
// iff B is positive.
|
||||
if (B.isNonNegative()) {
|
||||
// If B >= 0, the vertex it at a negative location (or at 0), so in
|
||||
// order to have a non-negative solution we need to pick k that makes
|
||||
// C-kR negative. To satisfy all the requirements for the solution
|
||||
// that we are looking for, it needs to be closest to 0 of all k.
|
||||
C = C.srem(R);
|
||||
if (C.isStrictlyPositive())
|
||||
C -= R;
|
||||
// Pick the greater solution.
|
||||
PickLow = false;
|
||||
} else {
|
||||
// If B < 0, the vertex is at a positive location. For any solution
|
||||
// to exist, the discriminant must be non-negative. This means that
|
||||
// C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
|
||||
// lower bound on values of k: kR >= C - B^2/4A.
|
||||
APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
|
||||
// Round LowkR up (towards +inf) to the nearest kR.
|
||||
LowkR = RoundUp(LowkR, R);
|
||||
|
||||
// If there exists k meeting the condition above, and such that
|
||||
// C-kR > 0, there will be two positive real number solutions of
|
||||
// q(x) = kR. Out of all such values of k, pick the one that makes
|
||||
// C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
|
||||
// In other words, find maximum k such that LowkR <= kR < C.
|
||||
if (C.sgt(LowkR)) {
|
||||
// If LowkR < C, then such a k is guaranteed to exist because
|
||||
// LowkR itself is a multiple of R.
|
||||
C -= -RoundUp(-C, R); // C = C - RoundDown(C, R)
|
||||
// Pick the smaller solution.
|
||||
PickLow = true;
|
||||
} else {
|
||||
// If C-kR < 0 for all potential k's, it means that one solution
|
||||
// will be negative, while the other will be positive. The positive
|
||||
// solution will shift towards 0 if the parabola is moved up.
|
||||
// Pick the kR closest to the lower bound (i.e. make C-kR closest
|
||||
// to 0, or in other words, out of all parabolas that have solutions,
|
||||
// pick the one that is the farthest "up").
|
||||
// Since LowkR is itself a multiple of R, simply take C-LowkR.
|
||||
C -= LowkR;
|
||||
// Pick the greater solution.
|
||||
PickLow = false;
|
||||
}
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
|
||||
<< B << "x + " << C << ", rw:" << RangeWidth << '\n');
|
||||
|
||||
APInt D = SqrB - 4*A*C;
|
||||
assert(D.isNonNegative() && "Negative discriminant");
|
||||
APInt SQ = D.sqrt();
|
||||
|
||||
APInt Q = SQ * SQ;
|
||||
bool InexactSQ = Q != D;
|
||||
// The calculated SQ may actually be greater than the exact (non-integer)
|
||||
// value. If that's the case, decremement SQ to get a value that is lower.
|
||||
if (Q.sgt(D))
|
||||
SQ -= 1;
|
||||
|
||||
APInt X;
|
||||
APInt Rem;
|
||||
|
||||
// SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
|
||||
// When using the quadratic formula directly, the calculated low root
|
||||
// may be greater than the exact one, since we would be subtracting SQ.
|
||||
// To make sure that the calculated root is not greater than the exact
|
||||
// one, subtract SQ+1 when calculating the low root (for inexact value
|
||||
// of SQ).
|
||||
if (PickLow)
|
||||
APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
|
||||
else
|
||||
APInt::sdivrem(-B + SQ, TwoA, X, Rem);
|
||||
|
||||
// The updated coefficients should be such that the (exact) solution is
|
||||
// positive. Since APInt division rounds towards 0, the calculated one
|
||||
// can be 0, but cannot be negative.
|
||||
assert(X.isNonNegative() && "Solution should be non-negative");
|
||||
|
||||
if (!InexactSQ && Rem.isNullValue()) {
|
||||
LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
|
||||
return X;
|
||||
}
|
||||
|
||||
assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
|
||||
// The exact value of the square root of D should be between SQ and SQ+1.
|
||||
// This implies that the solution should be between that corresponding to
|
||||
// SQ (i.e. X) and that corresponding to SQ+1.
|
||||
//
|
||||
// The calculated X cannot be greater than the exact (real) solution.
|
||||
// Actually it must be strictly less than the exact solution, while
|
||||
// X+1 will be greater than or equal to it.
|
||||
|
||||
APInt VX = (A*X + B)*X + C;
|
||||
APInt VY = VX + TwoA*X + A + B;
|
||||
bool SignChange = VX.isNegative() != VY.isNegative() ||
|
||||
VX.isNullValue() != VY.isNullValue();
|
||||
// If the sign did not change between X and X+1, X is not a valid solution.
|
||||
// This could happen when the actual (exact) roots don't have an integer
|
||||
// between them, so they would both be contained between X and X+1.
|
||||
if (!SignChange) {
|
||||
LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
|
||||
return None;
|
||||
}
|
||||
|
||||
X += 1;
|
||||
LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
|
||||
return X;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
|
||||
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'f0':
|
||||
; CHECK-NEXT: Classifying expressions for: @f0
|
||||
; CHECK-NEXT: %v0 = phi i16 [ 2, %b0 ], [ %v2, %b1 ]
|
||||
; CHECK-NEXT: --> {2,+,1}<nuw><nsw><%b1> U: [2,4) S: [2,4) Exits: 3 LoopDispositions: { %b1: Computable }
|
||||
; CHECK-NEXT: %v1 = phi i16 [ 1, %b0 ], [ %v3, %b1 ]
|
||||
; CHECK-NEXT: --> {1,+,2,+,1}<%b1> U: full-set S: full-set Exits: 3 LoopDispositions: { %b1: Computable }
|
||||
; CHECK-NEXT: %v2 = add nsw i16 %v0, 1
|
||||
; CHECK-NEXT: --> {3,+,1}<nuw><nsw><%b1> U: [3,5) S: [3,5) Exits: 4 LoopDispositions: { %b1: Computable }
|
||||
; CHECK-NEXT: %v3 = add nsw i16 %v1, %v0
|
||||
; CHECK-NEXT: --> {3,+,3,+,1}<%b1> U: full-set S: full-set Exits: 6 LoopDispositions: { %b1: Computable }
|
||||
; CHECK-NEXT: %v4 = and i16 %v3, 1
|
||||
; CHECK-NEXT: --> (zext i1 {true,+,true,+,true}<%b1> to i16) U: [0,2) S: [0,2) Exits: 0 LoopDispositions: { %b1: Computable }
|
||||
; CHECK-NEXT: Determining loop execution counts for: @f0
|
||||
; CHECK-NEXT: Loop %b1: backedge-taken count is 1
|
||||
; CHECK-NEXT: Loop %b1: max backedge-taken count is 1
|
||||
; CHECK-NEXT: Loop %b1: Predicated backedge-taken count is 1
|
||||
; CHECK-NEXT: Predicates:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: Loop %b1: Trip multiple is 2
|
||||
define void @f0() {
|
||||
b0:
|
||||
br label %b1
|
||||
|
||||
b1: ; preds = %b1, %b0
|
||||
%v0 = phi i16 [ 2, %b0 ], [ %v2, %b1 ]
|
||||
%v1 = phi i16 [ 1, %b0 ], [ %v3, %b1 ]
|
||||
%v2 = add nsw i16 %v0, 1
|
||||
%v3 = add nsw i16 %v1, %v0
|
||||
%v4 = and i16 %v3, 1
|
||||
%v5 = icmp ne i16 %v4, 0
|
||||
br i1 %v5, label %b1, label %b2
|
||||
|
||||
b2: ; preds = %b1
|
||||
ret void
|
||||
}
|
||||
|
||||
@g0 = common dso_local global i16 0, align 2
|
||||
@g1 = common dso_local global i32 0, align 4
|
||||
@g2 = common dso_local global i32* null, align 8
|
||||
|
||||
; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'f1':
|
||||
; CHECK-NEXT: Classifying expressions for: @f1
|
||||
; CHECK-NEXT: %v0 = phi i16 [ 0, %b0 ], [ %v3, %b1 ]
|
||||
; CHECK-NEXT: --> {0,+,3,+,1}<%b1> U: full-set S: full-set Exits: 7 LoopDispositions: { %b1: Computable }
|
||||
; CHECK-NEXT: %v1 = phi i32 [ 3, %b0 ], [ %v6, %b1 ]
|
||||
; CHECK-NEXT: --> {3,+,1}<nuw><nsw><%b1> U: [3,6) S: [3,6) Exits: 5 LoopDispositions: { %b1: Computable }
|
||||
; CHECK-NEXT: %v2 = trunc i32 %v1 to i16
|
||||
; CHECK-NEXT: --> {3,+,1}<%b1> U: [3,6) S: [3,6) Exits: 5 LoopDispositions: { %b1: Computable }
|
||||
; CHECK-NEXT: %v3 = add i16 %v0, %v2
|
||||
; CHECK-NEXT: --> {3,+,4,+,1}<%b1> U: full-set S: full-set Exits: 12 LoopDispositions: { %b1: Computable }
|
||||
; CHECK-NEXT: %v4 = and i16 %v3, 1
|
||||
; CHECK-NEXT: --> (zext i1 {true,+,false,+,true}<%b1> to i16) U: [0,2) S: [0,2) Exits: 0 LoopDispositions: { %b1: Computable }
|
||||
; CHECK-NEXT: %v6 = add nuw nsw i32 %v1, 1
|
||||
; CHECK-NEXT: --> {4,+,1}<nuw><nsw><%b1> U: [4,7) S: [4,7) Exits: 6 LoopDispositions: { %b1: Computable }
|
||||
; CHECK-NEXT: %v7 = phi i32 [ %v1, %b1 ]
|
||||
; CHECK-NEXT: --> %v7 U: [3,6) S: [3,6)
|
||||
; CHECK-NEXT: %v8 = phi i16 [ %v3, %b1 ]
|
||||
; CHECK-NEXT: --> %v8 U: full-set S: full-set
|
||||
; CHECK-NEXT: Determining loop execution counts for: @f1
|
||||
; CHECK-NEXT: Loop %b3: <multiple exits> Unpredictable backedge-taken count.
|
||||
; CHECK-NEXT: Loop %b3: Unpredictable max backedge-taken count.
|
||||
; CHECK-NEXT: Loop %b3: Unpredictable predicated backedge-taken count.
|
||||
; CHECK-NEXT: Loop %b1: backedge-taken count is 2
|
||||
; CHECK-NEXT: Loop %b1: max backedge-taken count is 2
|
||||
; CHECK-NEXT: Loop %b1: Predicated backedge-taken count is 2
|
||||
; CHECK-NEXT: Predicates:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: Loop %b1: Trip multiple is 3
|
||||
define void @f1() #0 {
|
||||
b0:
|
||||
store i16 0, i16* @g0, align 2
|
||||
store i32* @g1, i32** @g2, align 8
|
||||
br label %b1
|
||||
|
||||
b1: ; preds = %b1, %b0
|
||||
%v0 = phi i16 [ 0, %b0 ], [ %v3, %b1 ]
|
||||
%v1 = phi i32 [ 3, %b0 ], [ %v6, %b1 ]
|
||||
%v2 = trunc i32 %v1 to i16
|
||||
%v3 = add i16 %v0, %v2
|
||||
%v4 = and i16 %v3, 1
|
||||
%v5 = icmp eq i16 %v4, 0
|
||||
%v6 = add nuw nsw i32 %v1, 1
|
||||
br i1 %v5, label %b2, label %b1
|
||||
|
||||
b2: ; preds = %b1
|
||||
%v7 = phi i32 [ %v1, %b1 ]
|
||||
%v8 = phi i16 [ %v3, %b1 ]
|
||||
store i32 %v7, i32* @g1, align 4
|
||||
store i16 %v8, i16* @g0, align 2
|
||||
br label %b3
|
||||
|
||||
b3: ; preds = %b3, %b2
|
||||
br label %b3
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind uwtable "target-cpu"="x86-64" }
|
|
@ -0,0 +1,51 @@
|
|||
; RUN: opt -analyze -scalar-evolution -S < %s | FileCheck %s
|
||||
|
||||
; The exit value from this loop was originally calculated as 0.
|
||||
; The actual exit condition is 256*256 == 0 (in i16).
|
||||
|
||||
; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'f0':
|
||||
; CHECK-NEXT: Classifying expressions for: @f0
|
||||
; CHECK-NEXT: %v1 = phi i16 [ 0, %b0 ], [ %v2, %b1 ]
|
||||
; CHECK-NEXT: --> {0,+,-1}<%b1> U: [-255,1) S: [-255,1) Exits: -255 LoopDispositions: { %b1: Computable }
|
||||
; CHECK-NEXT: %v2 = add i16 %v1, -1
|
||||
; CHECK-NEXT: --> {-1,+,-1}<%b1> U: [-256,0) S: [-256,0) Exits: -256 LoopDispositions: { %b1: Computable }
|
||||
; CHECK-NEXT: %v3 = mul i16 %v2, %v2
|
||||
; CHECK-NEXT: --> {1,+,3,+,2}<%b1> U: full-set S: full-set Exits: 0 LoopDispositions: { %b1: Computable }
|
||||
; CHECK-NEXT: %v5 = phi i16 [ %v2, %b1 ]
|
||||
; CHECK-NEXT: --> %v5 U: [-256,0) S: [-256,0)
|
||||
; CHECK-NEXT: %v6 = phi i16 [ %v3, %b1 ]
|
||||
; CHECK-NEXT: --> %v6 U: full-set S: full-set
|
||||
; CHECK-NEXT: %v7 = sext i16 %v5 to i32
|
||||
; CHECK-NEXT: --> (sext i16 %v5 to i32) U: [-256,0) S: [-256,0)
|
||||
; CHECK-NEXT: Determining loop execution counts for: @f0
|
||||
; CHECK-NEXT: Loop %b1: backedge-taken count is 255
|
||||
; CHECK-NEXT: Loop %b1: max backedge-taken count is 255
|
||||
; CHECK-NEXT: Loop %b1: Predicated backedge-taken count is 255
|
||||
; CHECK-NEXT: Predicates:
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: Loop %b1: Trip multiple is 256
|
||||
|
||||
|
||||
@g0 = global i32 0, align 4
|
||||
@g1 = global i16 0, align 2
|
||||
|
||||
define signext i32 @f0() {
|
||||
b0:
|
||||
br label %b1
|
||||
|
||||
b1: ; preds = %b1, %b0
|
||||
%v1 = phi i16 [ 0, %b0 ], [ %v2, %b1 ]
|
||||
%v2 = add i16 %v1, -1
|
||||
%v3 = mul i16 %v2, %v2
|
||||
%v4 = icmp eq i16 %v3, 0
|
||||
br i1 %v4, label %b2, label %b1
|
||||
|
||||
b2: ; preds = %b1
|
||||
%v5 = phi i16 [ %v2, %b1 ]
|
||||
%v6 = phi i16 [ %v3, %b1 ]
|
||||
%v7 = sext i16 %v5 to i32
|
||||
store i32 %v7, i32* @g0, align 4
|
||||
store i16 %v6, i16* @g1, align 2
|
||||
ret i32 0
|
||||
}
|
||||
|
|
@ -0,0 +1,364 @@
|
|||
; RUN: opt -analyze -scalar-evolution -S -debug-only=scalar-evolution,apint < %s 2>&1 | FileCheck %s
|
||||
; REQUIRES: asserts
|
||||
|
||||
; Use the following template to get a chrec {L,+,M,+,N}.
|
||||
;
|
||||
; define signext i32 @func() {
|
||||
; entry:
|
||||
; br label %loop
|
||||
;
|
||||
; loop:
|
||||
; %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ]
|
||||
; %inc = phi i32 [ X, %entry ], [ %inc1, %loop ]
|
||||
; %acc = phi i32 [ Y, %entry ], [ %acc1, %loop ]
|
||||
; %ivr1 = add i32 %ivr, %inc
|
||||
; %inc1 = add i32 %inc, Z ; M = inc1 = inc + N = X + N
|
||||
; %acc1 = add i32 %acc, %inc ; L = acc1 = X + Y
|
||||
; %and = and i32 %acc1, 2^W-1 ; iW
|
||||
; %cond = icmp eq i32 %and, 0
|
||||
; br i1 %cond, label %exit, label %loop
|
||||
;
|
||||
; exit:
|
||||
; %rv = phi i32 [ %acc1, %loop ]
|
||||
; ret i32 %rv
|
||||
; }
|
||||
;
|
||||
; From
|
||||
; X + Y = L
|
||||
; X + Z = M
|
||||
; Z = N
|
||||
; get
|
||||
; X = M - N
|
||||
; Y = N - M + L
|
||||
; Z = N
|
||||
|
||||
; The connection between the chrec coefficients {L,+,M,+,N} and the quadratic
|
||||
; coefficients is that the quadratic equation is N x^2 + (2M-N) x + 2L = 0,
|
||||
; where the equation was multiplied by 2 to make the coefficient at x^2 an
|
||||
; integer (the actual equation is N/2 x^2 + (M-N/2) x + L = 0).
|
||||
|
||||
; Quadratic equation: 2x^2 + 2x + 4 in i4, solution (wrap): 4
|
||||
; {14,+,14,+,14} -> X=0, Y=14, Z=14
|
||||
;
|
||||
; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test01'
|
||||
; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {-2,+,-2,+,-2}<%loop>
|
||||
; CHECK: GetQuadraticEquation: addrec coeff bw: 4
|
||||
; CHECK: GetQuadraticEquation: equation -2x^2 + -2x + -4, coeff bw: 5, multiplied by 2
|
||||
; CHECK: SolveQuadraticAddRecExact: solving for unsigned overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving -2x^2 + -2x + -4, rw:5
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 2x^2 + 2x + -28, rw:5
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (wrap): 4
|
||||
; CHECK: Loop %loop: Unpredictable backedge-taken count
|
||||
define signext i32 @test01() {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ]
|
||||
%inc = phi i32 [ 0, %entry ], [ %inc1, %loop ]
|
||||
%acc = phi i32 [ 14, %entry ], [ %acc1, %loop ]
|
||||
%ivr1 = add i32 %ivr, %inc
|
||||
%inc1 = add i32 %inc, 14
|
||||
%acc1 = add i32 %acc, %inc
|
||||
%and = and i32 %acc1, 15
|
||||
%cond = icmp eq i32 %and, 0
|
||||
br i1 %cond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
%rv = phi i32 [ %acc1, %loop ]
|
||||
ret i32 %rv
|
||||
}
|
||||
|
||||
; Quadratic equation: 1x^2 + -73x + -146 in i32, solution (wrap): 75
|
||||
; {-72,+,-36,+,1} -> X=-37, Y=-35, Z=1
|
||||
;
|
||||
; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test02':
|
||||
; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {0,+,-36,+,1}<%loop>
|
||||
; CHECK: GetQuadraticEquation: addrec coeff bw: 32
|
||||
; CHECK: GetQuadraticEquation: equation 1x^2 + -73x + 0, coeff bw: 33, multiplied by 2
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -73x + 4294967154, rw:32
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -73x + -142, rw:32
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (wrap): 75
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -73x + 4294967154, rw:33
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -73x + -4294967438, rw:33
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (wrap): 65573
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -73x + -146, rw:32
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -73x + -146, rw:32
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (wrap): 75
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -73x + -146, rw:33
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -73x + -146, rw:33
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (wrap): 75
|
||||
; CHECK: Loop %loop: backedge-taken count is 75
|
||||
define signext i32 @test02() {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ]
|
||||
%inc = phi i32 [ -37, %entry ], [ %inc1, %loop ]
|
||||
%acc = phi i32 [ -35, %entry ], [ %acc1, %loop ]
|
||||
%ivr1 = add i32 %ivr, %inc
|
||||
%inc1 = add i32 %inc, 1
|
||||
%acc1 = add i32 %acc, %inc
|
||||
%and = and i32 %acc1, -1
|
||||
%cond = icmp sgt i32 %and, 0
|
||||
br i1 %cond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
%rv = phi i32 [ %acc1, %loop ]
|
||||
ret i32 %rv
|
||||
}
|
||||
|
||||
; Quadratic equation: 2x^2 - 4x + 34 in i4, solution (exact): 1.
|
||||
; {17,+,-1,+,2} -> X=-3, Y=20, Z=2
|
||||
;
|
||||
; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test03':
|
||||
; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {1,+,-1,+,2}<%loop>
|
||||
; CHECK: GetQuadraticEquation: addrec coeff bw: 4
|
||||
; CHECK: GetQuadraticEquation: equation 2x^2 + -4x + 2, coeff bw: 5, multiplied by 2
|
||||
; CHECK: SolveQuadraticAddRecExact: solving for unsigned overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 2x^2 + -4x + 2, rw:5
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 2x^2 + -4x + 2, rw:5
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (root): 1
|
||||
; CHECK: Loop %loop: backedge-taken count is 1
|
||||
define signext i32 @test03() {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ]
|
||||
%inc = phi i32 [ -3, %entry ], [ %inc1, %loop ]
|
||||
%acc = phi i32 [ 20, %entry ], [ %acc1, %loop ]
|
||||
%ivr1 = add i32 %ivr, %inc
|
||||
%inc1 = add i32 %inc, 2
|
||||
%acc1 = add i32 %acc, %inc
|
||||
%and = and i32 %acc1, 15
|
||||
%cond = icmp eq i32 %and, 0
|
||||
br i1 %cond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
%rv = phi i32 [ %acc1, %loop ]
|
||||
ret i32 %rv
|
||||
}
|
||||
|
||||
; Quadratic equation 4x^2 + 2x + 2 in i16, solution (wrap): 181
|
||||
; {1,+,3,+,4} -> X=-1, Y=2, Z=4 (i16)
|
||||
;
|
||||
; This is an example where the returned solution is the first time an
|
||||
; unsigned wrap occurs, whereas the actual exit condition occurs much
|
||||
; later. The number of iterations returned by SolveQuadraticEquation
|
||||
; is 181, but the loop will iterate 37174 times.
|
||||
;
|
||||
; Here is a C code that corresponds to this case that calculates the number
|
||||
; of iterations:
|
||||
;
|
||||
; int test04() {
|
||||
; int c = 0;
|
||||
; int ivr = 0;
|
||||
; int inc = -1;
|
||||
; int acc = 2;
|
||||
;
|
||||
; while (acc & 0xffff) {
|
||||
; c++;
|
||||
; ivr += inc;
|
||||
; inc += 4;
|
||||
; acc += inc;
|
||||
; }
|
||||
;
|
||||
; return c;
|
||||
; }
|
||||
;
|
||||
|
||||
; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test04':
|
||||
; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {0,+,3,+,4}<%loop>
|
||||
; CHECK: GetQuadraticEquation: addrec coeff bw: 16
|
||||
; CHECK: GetQuadraticEquation: equation 4x^2 + 2x + 0, coeff bw: 17, multiplied by 2
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 4x^2 + 2x + 2, rw:16
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 4x^2 + 2x + -65534, rw:16
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (wrap): 128
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 4x^2 + 2x + 2, rw:17
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 4x^2 + 2x + -131070, rw:17
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (wrap): 181
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 4x^2 + 2x + 2, rw:16
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 4x^2 + 2x + -65534, rw:16
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (wrap): 128
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 4x^2 + 2x + 2, rw:17
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 4x^2 + 2x + -131070, rw:17
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (wrap): 181
|
||||
; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {1,+,3,+,4}<%loop>
|
||||
; CHECK: GetQuadraticEquation: addrec coeff bw: 16
|
||||
; CHECK: GetQuadraticEquation: equation 4x^2 + 2x + 2, coeff bw: 17, multiplied by 2
|
||||
; CHECK: SolveQuadraticAddRecExact: solving for unsigned overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 4x^2 + 2x + 2, rw:17
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 4x^2 + 2x + -131070, rw:17
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (wrap): 181
|
||||
; CHECK: Loop %loop: Unpredictable backedge-taken count.
|
||||
define signext i32 @test04() {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ]
|
||||
%inc = phi i32 [ -1, %entry ], [ %inc1, %loop ]
|
||||
%acc = phi i32 [ 2, %entry ], [ %acc1, %loop ]
|
||||
%ivr1 = add i32 %ivr, %inc
|
||||
%inc1 = add i32 %inc, 4
|
||||
%acc1 = add i32 %acc, %inc
|
||||
%and = trunc i32 %acc1 to i16
|
||||
%cond = icmp eq i16 %and, 0
|
||||
br i1 %cond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
%rv = phi i32 [ %acc1, %loop ]
|
||||
ret i32 %rv
|
||||
}
|
||||
|
||||
; A case with signed arithmetic, but unsigned comparison.
|
||||
|
||||
; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test05':
|
||||
; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {0,+,-1,+,-1}<%loop>
|
||||
; CHECK: GetQuadraticEquation: addrec coeff bw: 32
|
||||
; CHECK: GetQuadraticEquation: equation -1x^2 + -1x + 0, coeff bw: 33, multiplied by 2
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving -1x^2 + -1x + 4, rw:32
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + 1x + -4, rw:32
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (wrap): 2
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving -1x^2 + -1x + 4, rw:33
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + 1x + -4, rw:33
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (wrap): 2
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving -1x^2 + -1x + -2, rw:32
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + 1x + -4294967294, rw:32
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (wrap): 65536
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving -1x^2 + -1x + -2, rw:33
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + 1x + -8589934590, rw:33
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (wrap): 92682
|
||||
; CHECK: Loop %loop: backedge-taken count is 2
|
||||
|
||||
define signext i32 @test05() {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ]
|
||||
%inc = phi i32 [ 0, %entry ], [ %inc1, %loop ]
|
||||
%acc = phi i32 [ -1, %entry ], [ %acc1, %loop ]
|
||||
%ivr1 = add i32 %ivr, %inc
|
||||
%inc1 = add i32 %inc, -1
|
||||
%acc1 = add i32 %acc, %inc
|
||||
%and = and i32 %acc1, -1
|
||||
%cond = icmp ule i32 %and, -3
|
||||
br i1 %cond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
%rv = phi i32 [ %acc1, %loop ]
|
||||
ret i32 %rv
|
||||
}
|
||||
|
||||
; A test that used to crash with one of the earlier versions of the code.
|
||||
|
||||
; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test06':
|
||||
; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {0,+,-99999,+,1}<%loop>
|
||||
; CHECK: GetQuadraticEquation: addrec coeff bw: 32
|
||||
; CHECK: GetQuadraticEquation: equation 1x^2 + -199999x + 0, coeff bw: 33, multiplied by 2
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -199999x + -4294967294, rw:32
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -199999x + 2, rw:32
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (wrap): 1
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -199999x + -4294967294, rw:33
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -199999x + 4294967298, rw:33
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (wrap): 24469
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -199999x + -12, rw:32
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -199999x + 4294967284, rw:32
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (wrap): 24469
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -199999x + -12, rw:33
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -199999x + 8589934580, rw:33
|
||||
; CHECK: SolveQuadraticEquationWrap: solution (wrap): 62450
|
||||
; CHECK: Loop %loop: backedge-taken count is 24469
|
||||
define signext i32 @test06() {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ]
|
||||
%inc = phi i32 [ -100000, %entry ], [ %inc1, %loop ]
|
||||
%acc = phi i32 [ 100000, %entry ], [ %acc1, %loop ]
|
||||
%ivr1 = add i32 %ivr, %inc
|
||||
%inc1 = add i32 %inc, 1
|
||||
%acc1 = add i32 %acc, %inc
|
||||
%and = and i32 %acc1, -1
|
||||
%cond = icmp sgt i32 %and, 5
|
||||
br i1 %cond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
%rv = phi i32 [ %acc1, %loop ]
|
||||
ret i32 %rv
|
||||
}
|
||||
|
||||
; The equation
|
||||
; 532052752x^2 + -450429774x + 71188414 = 0
|
||||
; has two exact solutions (up to two decimal digits): 0.21 and 0.64.
|
||||
; Since there is no integer between them, there is no integer n that either
|
||||
; solves the equation exactly, or changes the sign of it between n and n+1.
|
||||
|
||||
; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test07':
|
||||
; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {0,+,40811489,+,532052752}<%loop>
|
||||
; CHECK: GetQuadraticEquation: addrec coeff bw: 32
|
||||
; CHECK: GetQuadraticEquation: equation 532052752x^2 + -450429774x + 0, coeff bw: 33, multiplied by 2
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 532052752x^2 + -450429774x + 71188414, rw:32
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 532052752x^2 + -450429774x + 71188414, rw:32
|
||||
; CHECK: SolveQuadraticEquationWrap: no valid solution
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 532052752x^2 + -450429774x + 71188414, rw:33
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 532052752x^2 + -450429774x + 71188414, rw:33
|
||||
; CHECK: SolveQuadraticEquationWrap: no valid solution
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for signed overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 532052752x^2 + -450429774x + 71188414, rw:32
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 532052752x^2 + -450429774x + 71188414, rw:32
|
||||
; CHECK: SolveQuadraticEquationWrap: no valid solution
|
||||
; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 532052752x^2 + -450429774x + 71188414, rw:33
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 532052752x^2 + -450429774x + 71188414, rw:33
|
||||
; CHECK: SolveQuadraticEquationWrap: no valid solution
|
||||
; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {35594207,+,40811489,+,532052752}<%loop>
|
||||
; CHECK: GetQuadraticEquation: addrec coeff bw: 32
|
||||
; CHECK: GetQuadraticEquation: equation 532052752x^2 + -450429774x + 71188414, coeff bw: 33, multiplied by 2
|
||||
; CHECK: SolveQuadraticAddRecExact: solving for unsigned overflow
|
||||
; CHECK: SolveQuadraticEquationWrap: solving 532052752x^2 + -450429774x + 71188414, rw:33
|
||||
; CHECK: SolveQuadraticEquationWrap: updated coefficients 532052752x^2 + -450429774x + 71188414, rw:33
|
||||
; CHECK: SolveQuadraticEquationWrap: no valid solution
|
||||
; CHECK: Loop %loop: Unpredictable backedge-taken count.
|
||||
define signext i32 @test07() {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ]
|
||||
%inc = phi i32 [ -491241263, %entry ], [ %inc1, %loop ]
|
||||
%acc = phi i32 [ 526835470, %entry ], [ %acc1, %loop ]
|
||||
%ivr1 = add i32 %ivr, %inc
|
||||
%inc1 = add i32 %inc, 532052752
|
||||
%acc1 = add i32 %acc, %inc
|
||||
%and = and i32 %acc1, -1
|
||||
%cond = icmp eq i32 %and, 0
|
||||
br i1 %cond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
%rv = phi i32 [ %acc1, %loop ]
|
||||
ret i32 %rv
|
||||
}
|
||||
|
|
@ -10,6 +10,7 @@
|
|||
#include "llvm/ADT/APInt.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include <array>
|
||||
|
||||
|
@ -2357,4 +2358,89 @@ TEST(APIntTest, RoundingSDiv) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(APIntTest, SolveQuadraticEquationWrap) {
|
||||
// Verify that "Solution" is the first non-negative integer that solves
|
||||
// Ax^2 + Bx + C = "0 or overflow", i.e. that it is a correct solution
|
||||
// as calculated by SolveQuadraticEquationWrap.
|
||||
auto Validate = [] (int A, int B, int C, unsigned Width, int Solution) {
|
||||
int Mask = (1 << Width) - 1;
|
||||
|
||||
// Solution should be non-negative.
|
||||
EXPECT_GE(Solution, 0);
|
||||
|
||||
auto OverflowBits = [] (int64_t V, unsigned W) {
|
||||
return V & -(1 << W);
|
||||
};
|
||||
|
||||
int64_t Over0 = OverflowBits(C, Width);
|
||||
|
||||
auto IsZeroOrOverflow = [&] (int X) {
|
||||
int64_t ValueAtX = A*X*X + B*X + C;
|
||||
int64_t OverX = OverflowBits(ValueAtX, Width);
|
||||
return (ValueAtX & Mask) == 0 || OverX != Over0;
|
||||
};
|
||||
|
||||
auto EquationToString = [&] (const char *X_str) {
|
||||
return Twine(A) + Twine(X_str) + Twine("^2 + ") + Twine(B) +
|
||||
Twine(X_str) + Twine(" + ") + Twine(C) + Twine(", bitwidth: ") +
|
||||
Twine(Width);
|
||||
};
|
||||
|
||||
auto IsSolution = [&] (const char *X_str, int X) {
|
||||
if (IsZeroOrOverflow(X))
|
||||
return ::testing::AssertionSuccess()
|
||||
<< X << " is a solution of " << EquationToString(X_str);
|
||||
return ::testing::AssertionFailure()
|
||||
<< X << " is not an expected solution of "
|
||||
<< EquationToString(X_str);
|
||||
};
|
||||
|
||||
auto IsNotSolution = [&] (const char *X_str, int X) {
|
||||
if (!IsZeroOrOverflow(X))
|
||||
return ::testing::AssertionSuccess()
|
||||
<< X << " is not a solution of " << EquationToString(X_str);
|
||||
return ::testing::AssertionFailure()
|
||||
<< X << " is an unexpected solution of "
|
||||
<< EquationToString(X_str);
|
||||
};
|
||||
|
||||
// This is the important part: make sure that there is no solution that
|
||||
// is less than the calculated one.
|
||||
if (Solution > 0) {
|
||||
for (int X = 1; X < Solution-1; ++X)
|
||||
EXPECT_PRED_FORMAT1(IsNotSolution, X);
|
||||
}
|
||||
|
||||
// Verify that the calculated solution is indeed a solution.
|
||||
EXPECT_PRED_FORMAT1(IsSolution, Solution);
|
||||
};
|
||||
|
||||
// Generate all possible quadratic equations with Width-bit wide integer
|
||||
// coefficients, get the solution from SolveQuadraticEquationWrap, and
|
||||
// verify that the solution is correct.
|
||||
auto Iterate = [&] (unsigned Width) {
|
||||
assert(1 < Width && Width < 32);
|
||||
int Low = -(1 << (Width-1));
|
||||
int High = (1 << (Width-1));
|
||||
|
||||
for (int A = Low; A != High; ++A) {
|
||||
if (A == 0)
|
||||
continue;
|
||||
for (int B = Low; B != High; ++B) {
|
||||
for (int C = Low; C != High; ++C) {
|
||||
Optional<APInt> S = APIntOps::SolveQuadraticEquationWrap(
|
||||
APInt(Width, A), APInt(Width, B),
|
||||
APInt(Width, C), Width);
|
||||
if (S.hasValue())
|
||||
Validate(A, B, C, Width, S->getSExtValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Test all widths in [2..6].
|
||||
for (unsigned i = 2; i <= 6; ++i)
|
||||
Iterate(i);
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
|
Loading…
Reference in New Issue