forked from OSchip/llvm-project
[LoopAccessAnalysis] Fix an integer overflow
We were inappropriately using 32-bit types to account for quantities that can be far larger. Fixed in PR28443. llvm-svn: 274737
This commit is contained in:
parent
d5d2a35013
commit
7afb46d3c8
|
@ -228,7 +228,7 @@ public:
|
|||
|
||||
/// \brief The maximum number of bytes of a vector register we can vectorize
|
||||
/// the accesses safely with.
|
||||
unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
|
||||
uint64_t getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
|
||||
|
||||
/// \brief In same cases when the dependency check fails we can still
|
||||
/// vectorize the loop with a dynamic array access check.
|
||||
|
@ -284,7 +284,7 @@ private:
|
|||
unsigned AccessIdx;
|
||||
|
||||
// We can access this many bytes in parallel safely.
|
||||
unsigned MaxSafeDepDistBytes;
|
||||
uint64_t MaxSafeDepDistBytes;
|
||||
|
||||
/// \brief If we see a non-constant dependence distance we can still try to
|
||||
/// vectorize this loop with runtime checks.
|
||||
|
@ -324,7 +324,7 @@ private:
|
|||
///
|
||||
/// \return false if we shouldn't vectorize at all or avoid larger
|
||||
/// vectorization factors by limiting MaxSafeDepDistBytes.
|
||||
bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize);
|
||||
bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
|
||||
};
|
||||
|
||||
/// \brief Holds information about the memory runtime legality checks to verify
|
||||
|
@ -575,7 +575,7 @@ public:
|
|||
/// Returns true if the value V is uniform within the loop.
|
||||
bool isUniform(Value *V) const;
|
||||
|
||||
unsigned getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
|
||||
uint64_t getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
|
||||
unsigned getNumStores() const { return NumStores; }
|
||||
unsigned getNumLoads() const { return NumLoads;}
|
||||
|
||||
|
@ -672,7 +672,7 @@ private:
|
|||
unsigned NumLoads;
|
||||
unsigned NumStores;
|
||||
|
||||
unsigned MaxSafeDepDistBytes;
|
||||
uint64_t MaxSafeDepDistBytes;
|
||||
|
||||
/// \brief Cache the result of analyzeLoop.
|
||||
bool CanVecMem;
|
||||
|
@ -719,9 +719,9 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
|
|||
/// to \p PtrToStride and therefore add further predicates to \p PSE.
|
||||
/// The \p Assume parameter indicates if we are allowed to make additional
|
||||
/// run-time assumptions.
|
||||
int getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
|
||||
const ValueToValueMap &StridesMap = ValueToValueMap(),
|
||||
bool Assume = false);
|
||||
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
|
||||
const ValueToValueMap &StridesMap = ValueToValueMap(),
|
||||
bool Assume = false);
|
||||
|
||||
/// \brief Returns true if the memory operations \p A and \p B are consecutive.
|
||||
/// This is a simple API that does not depend on the analysis pass.
|
||||
|
|
|
@ -575,7 +575,7 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE,
|
|||
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
|
||||
return true;
|
||||
|
||||
int Stride = getPtrStride(PSE, Ptr, L, Strides);
|
||||
int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
|
||||
return Stride == 1;
|
||||
}
|
||||
|
||||
|
@ -866,9 +866,9 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
|
|||
}
|
||||
|
||||
/// \brief Check whether the access through \p Ptr has a constant stride.
|
||||
int llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
|
||||
const Loop *Lp, const ValueToValueMap &StridesMap,
|
||||
bool Assume) {
|
||||
int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
|
||||
const Loop *Lp, const ValueToValueMap &StridesMap,
|
||||
bool Assume) {
|
||||
Type *Ty = Ptr->getType();
|
||||
assert(Ty->isPointerTy() && "Unexpected non-ptr");
|
||||
|
||||
|
@ -1097,8 +1097,8 @@ bool MemoryDepChecker::Dependence::isForward() const {
|
|||
llvm_unreachable("unexpected DepType!");
|
||||
}
|
||||
|
||||
bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
|
||||
unsigned TypeByteSize) {
|
||||
bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
|
||||
uint64_t TypeByteSize) {
|
||||
// If loads occur at a distance that is not a multiple of a feasible vector
|
||||
// factor store-load forwarding does not take place.
|
||||
// Positive dependences might cause troubles because vectorizing them might
|
||||
|
@ -1111,13 +1111,13 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
|
|||
|
||||
// After this many iterations store-to-load forwarding conflicts should not
|
||||
// cause any slowdowns.
|
||||
const unsigned NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
|
||||
const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
|
||||
// Maximum vector factor.
|
||||
unsigned MaxVFWithoutSLForwardIssues = std::min(
|
||||
uint64_t MaxVFWithoutSLForwardIssues = std::min(
|
||||
VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
|
||||
|
||||
// Compute the smallest VF at which the store and load would be misaligned.
|
||||
for (unsigned VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
|
||||
for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
|
||||
VF *= 2) {
|
||||
// If the number of vector iteration between the store and the load are
|
||||
// small we could incur conflicts.
|
||||
|
@ -1145,8 +1145,8 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
|
|||
/// bytes.
|
||||
///
|
||||
/// \returns true if they are independent.
|
||||
static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride,
|
||||
unsigned TypeByteSize) {
|
||||
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
|
||||
uint64_t TypeByteSize) {
|
||||
assert(Stride > 1 && "The stride must be greater than 1");
|
||||
assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
|
||||
assert(Distance > 0 && "The distance must be non-zero");
|
||||
|
@ -1155,7 +1155,7 @@ static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride,
|
|||
if (Distance % TypeByteSize)
|
||||
return false;
|
||||
|
||||
unsigned ScaledDist = Distance / TypeByteSize;
|
||||
uint64_t ScaledDist = Distance / TypeByteSize;
|
||||
|
||||
// No dependence if the scaled distance is not multiple of the stride.
|
||||
// E.g.
|
||||
|
@ -1196,8 +1196,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
|
|||
BPtr->getType()->getPointerAddressSpace())
|
||||
return Dependence::Unknown;
|
||||
|
||||
int StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
|
||||
int StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
|
||||
int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
|
||||
int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
|
||||
|
||||
const SCEV *Src = PSE.getSCEV(APtr);
|
||||
const SCEV *Sink = PSE.getSCEV(BPtr);
|
||||
|
@ -1237,11 +1237,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
|
|||
Type *ATy = APtr->getType()->getPointerElementType();
|
||||
Type *BTy = BPtr->getType()->getPointerElementType();
|
||||
auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
|
||||
unsigned TypeByteSize = DL.getTypeAllocSize(ATy);
|
||||
uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
|
||||
|
||||
const APInt &Val = C->getAPInt();
|
||||
int64_t Distance = Val.getSExtValue();
|
||||
unsigned Stride = std::abs(StrideAPtr);
|
||||
uint64_t Stride = std::abs(StrideAPtr);
|
||||
|
||||
// Attempt to prove strided accesses independent.
|
||||
if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
|
||||
|
@ -1315,9 +1315,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
|
|||
// If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
|
||||
// the minimum distance needed is 28, which is greater than distance. It is
|
||||
// not safe to do vectorization.
|
||||
unsigned MinDistanceNeeded =
|
||||
uint64_t MinDistanceNeeded =
|
||||
TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
|
||||
if (MinDistanceNeeded > Distance) {
|
||||
if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
|
||||
DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance
|
||||
<< '\n');
|
||||
return Dependence::Backward;
|
||||
|
@ -1347,7 +1347,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
|
|||
// is 8, which is less than 2 and forbidden vectorization, But actually
|
||||
// both A and B could be vectorized by 2 iterations.
|
||||
MaxSafeDepDistBytes =
|
||||
Distance < MaxSafeDepDistBytes ? Distance : MaxSafeDepDistBytes;
|
||||
std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
|
||||
|
||||
bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
|
||||
if (IsTrueDataDependence && EnableForwardingConflictDetection &&
|
||||
|
@ -1365,7 +1365,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
|
|||
MemAccessInfoSet &CheckDeps,
|
||||
const ValueToValueMap &Strides) {
|
||||
|
||||
MaxSafeDepDistBytes = -1U;
|
||||
MaxSafeDepDistBytes = -1;
|
||||
while (!CheckDeps.empty()) {
|
||||
MemAccessInfo CurAccess = *CheckDeps.begin();
|
||||
|
||||
|
@ -1926,7 +1926,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
|
|||
PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
|
||||
DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
|
||||
DL(&DL), TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
|
||||
MaxSafeDepDistBytes(-1U), CanVecMem(false),
|
||||
MaxSafeDepDistBytes(-1), CanVecMem(false),
|
||||
StoreToLoopInvariantAddress(false) {
|
||||
if (canAnalyzeLoop())
|
||||
analyzeLoop();
|
||||
|
@ -1935,7 +1935,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
|
|||
void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
|
||||
if (CanVecMem) {
|
||||
OS.indent(Depth) << "Memory dependences are safe";
|
||||
if (MaxSafeDepDistBytes != -1U)
|
||||
if (MaxSafeDepDistBytes != -1ULL)
|
||||
OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
|
||||
<< " bytes";
|
||||
if (PtrRtChecking->Need)
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
; RUN: opt -basicaa -loop-distribute -verify-loop-info -verify-dom-info -S \
|
||||
; RUN: < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @fn1(i64 %a, i64* %b) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%add75.epil = phi i64 [ %add7.epil, %for.body ], [ %a, %entry ]
|
||||
%add1.epil = add nsw i64 %add75.epil, 268435457
|
||||
%arrayidx.epil = getelementptr inbounds i64, i64* %b, i64 %add1.epil
|
||||
%load = load i64, i64* %arrayidx.epil, align 8
|
||||
%add5.epil = add nsw i64 %add75.epil, 805306369
|
||||
%arrayidx6.epil = getelementptr inbounds i64, i64* %b, i64 %add5.epil
|
||||
store i64 %load, i64* %arrayidx6.epil, align 8
|
||||
%add7.epil = add nsw i64 %add75.epil, 2
|
||||
%epil.iter.cmp = icmp eq i64 %add7.epil, 0
|
||||
br i1 %epil.iter.cmp, label %for.end, label %for.body
|
||||
|
||||
; CHECK: %[[phi:.*]] = phi i64
|
||||
; CHECK: %[[add1:.*]] = add nsw i64 %[[phi]], 268435457
|
||||
; CHECK: %[[gep1:.*]] = getelementptr inbounds i64, i64* %b, i64 %[[add1]]
|
||||
; CHECK: %[[load:.*]] = load i64, i64* %[[gep1]], align 8
|
||||
; CHECK: %[[add2:.*]] = add nsw i64 %[[phi]], 805306369
|
||||
; CHECK: %[[gep2:.*]] = getelementptr inbounds i64, i64* %b, i64 %[[add2]]
|
||||
; CHECK: store i64 %[[load]], i64* %[[gep2]], align 8
|
||||
; CHECK: %[[incr:.*]] = add nsw i64 %[[phi]], 2
|
||||
; CHECK: %[[cmp:.*]] = icmp eq i64 %[[incr]], 0
|
||||
; CHECK: br i1 %[[cmp]]
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue