forked from OSchip/llvm-project
[LAA] Fix estimation of number of memchecks
Summary: We need to add a runtime memcheck for pair of accesses (x,y) where at least one of x and y are writes. Assuming we have w writes and r reads, currently this number is estimated as being w* (w+r-1). This estimation will count (write,write) pairs twice and will overestimate the number of checks required. This change adds a getNumberOfChecks method to RuntimePointerCheck, which will count the number of runtime checks needed (similar in implementation to needsAnyChecking) and uses it to produce the correct number of runtime checks. Test Plan: llvm test suite spec2k spec2k6 Performance results: no changes observed (not surprising since the formula for 1 writer is basically the same, which would covers most cases - at least with the current check limit). Reviewers: anemet Reviewed By: anemet Subscribers: mzolotukhin, llvm-commits Differential Revision: http://reviews.llvm.org/D10217 llvm-svn: 239295
This commit is contained in:
parent
6aab1117e8
commit
98a137196a
|
@ -345,6 +345,10 @@ public:
|
||||||
/// to needsChecking.
|
/// to needsChecking.
|
||||||
bool needsAnyChecking(const SmallVectorImpl<int> *PtrPartition) const;
|
bool needsAnyChecking(const SmallVectorImpl<int> *PtrPartition) const;
|
||||||
|
|
||||||
|
/// \brief Returns the number of run-time checks required according to
|
||||||
|
/// needsChecking.
|
||||||
|
unsigned getNumberOfChecks(const SmallVectorImpl<int> *PtrPartition) const;
|
||||||
|
|
||||||
/// \brief Print the list run-time memory checks necessary.
|
/// \brief Print the list run-time memory checks necessary.
|
||||||
///
|
///
|
||||||
/// If \p PtrPartition is set, it contains the partition number for
|
/// If \p PtrPartition is set, it contains the partition number for
|
||||||
|
@ -385,7 +389,10 @@ public:
|
||||||
|
|
||||||
/// \brief Number of memchecks required to prove independence of otherwise
|
/// \brief Number of memchecks required to prove independence of otherwise
|
||||||
/// may-alias pointers.
|
/// may-alias pointers.
|
||||||
unsigned getNumRuntimePointerChecks() const { return NumComparisons; }
|
unsigned getNumRuntimePointerChecks(
|
||||||
|
const SmallVectorImpl<int> *PtrPartition = nullptr) const {
|
||||||
|
return PtrRtCheck.getNumberOfChecks(PtrPartition);
|
||||||
|
}
|
||||||
|
|
||||||
/// Return true if the block BB needs to be predicated in order for the loop
|
/// Return true if the block BB needs to be predicated in order for the loop
|
||||||
/// to be vectorized.
|
/// to be vectorized.
|
||||||
|
@ -460,10 +467,6 @@ private:
|
||||||
/// loop-independent and loop-carried dependences between memory accesses.
|
/// loop-independent and loop-carried dependences between memory accesses.
|
||||||
MemoryDepChecker DepChecker;
|
MemoryDepChecker DepChecker;
|
||||||
|
|
||||||
/// \brief Number of memchecks required to prove independence of otherwise
|
|
||||||
/// may-alias pointers
|
|
||||||
unsigned NumComparisons;
|
|
||||||
|
|
||||||
Loop *TheLoop;
|
Loop *TheLoop;
|
||||||
ScalarEvolution *SE;
|
ScalarEvolution *SE;
|
||||||
const DataLayout &DL;
|
const DataLayout &DL;
|
||||||
|
|
|
@ -177,15 +177,21 @@ void LoopAccessInfo::RuntimePointerCheck::print(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
|
unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks(
|
||||||
const SmallVectorImpl<int> *PtrPartition) const {
|
const SmallVectorImpl<int> *PtrPartition) const {
|
||||||
unsigned NumPointers = Pointers.size();
|
unsigned NumPointers = Pointers.size();
|
||||||
|
unsigned CheckCount = 0;
|
||||||
|
|
||||||
for (unsigned I = 0; I < NumPointers; ++I)
|
for (unsigned I = 0; I < NumPointers; ++I)
|
||||||
for (unsigned J = I + 1; J < NumPointers; ++J)
|
for (unsigned J = I + 1; J < NumPointers; ++J)
|
||||||
if (needsChecking(I, J, PtrPartition))
|
if (needsChecking(I, J, PtrPartition))
|
||||||
return true;
|
CheckCount++;
|
||||||
return false;
|
return CheckCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
|
||||||
|
const SmallVectorImpl<int> *PtrPartition) const {
|
||||||
|
return getNumberOfChecks(PtrPartition) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -220,10 +226,11 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \brief Check whether we can check the pointers at runtime for
|
/// \brief Check whether we can check the pointers at runtime for
|
||||||
/// non-intersection.
|
/// non-intersection. Returns true when we have 0 pointers
|
||||||
|
/// (a check on 0 pointers for non-intersection will always return true).
|
||||||
bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck,
|
bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck,
|
||||||
unsigned &NumComparisons, ScalarEvolution *SE,
|
bool &NeedRTCheck, ScalarEvolution *SE, Loop *TheLoop,
|
||||||
Loop *TheLoop, const ValueToValueMap &Strides,
|
const ValueToValueMap &Strides,
|
||||||
bool ShouldCheckStride = false);
|
bool ShouldCheckStride = false);
|
||||||
|
|
||||||
/// \brief Goes over all memory accesses, checks whether a RT check is needed
|
/// \brief Goes over all memory accesses, checks whether a RT check is needed
|
||||||
|
@ -290,23 +297,22 @@ static bool hasComputableBounds(ScalarEvolution *SE,
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AccessAnalysis::canCheckPtrAtRT(
|
bool AccessAnalysis::canCheckPtrAtRT(
|
||||||
LoopAccessInfo::RuntimePointerCheck &RtCheck, unsigned &NumComparisons,
|
LoopAccessInfo::RuntimePointerCheck &RtCheck, bool &NeedRTCheck,
|
||||||
ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap,
|
ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap,
|
||||||
bool ShouldCheckStride) {
|
bool ShouldCheckStride) {
|
||||||
// Find pointers with computable bounds. We are going to use this information
|
// Find pointers with computable bounds. We are going to use this information
|
||||||
// to place a runtime bound check.
|
// to place a runtime bound check.
|
||||||
bool CanDoRT = true;
|
bool CanDoRT = true;
|
||||||
|
|
||||||
|
NeedRTCheck = false;
|
||||||
|
if (!IsRTCheckNeeded) return true;
|
||||||
|
|
||||||
bool IsDepCheckNeeded = isDependencyCheckNeeded();
|
bool IsDepCheckNeeded = isDependencyCheckNeeded();
|
||||||
NumComparisons = 0;
|
|
||||||
|
|
||||||
// We assign a consecutive id to access from different alias sets.
|
// We assign a consecutive id to access from different alias sets.
|
||||||
// Accesses between different groups doesn't need to be checked.
|
// Accesses between different groups doesn't need to be checked.
|
||||||
unsigned ASId = 1;
|
unsigned ASId = 1;
|
||||||
for (auto &AS : AST) {
|
for (auto &AS : AST) {
|
||||||
unsigned NumReadPtrChecks = 0;
|
|
||||||
unsigned NumWritePtrChecks = 0;
|
|
||||||
|
|
||||||
// We assign consecutive id to access from different dependence sets.
|
// We assign consecutive id to access from different dependence sets.
|
||||||
// Accesses within the same set don't need a runtime check.
|
// Accesses within the same set don't need a runtime check.
|
||||||
unsigned RunningDepId = 1;
|
unsigned RunningDepId = 1;
|
||||||
|
@ -317,11 +323,6 @@ bool AccessAnalysis::canCheckPtrAtRT(
|
||||||
bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
|
bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
|
||||||
MemAccessInfo Access(Ptr, IsWrite);
|
MemAccessInfo Access(Ptr, IsWrite);
|
||||||
|
|
||||||
if (IsWrite)
|
|
||||||
++NumWritePtrChecks;
|
|
||||||
else
|
|
||||||
++NumReadPtrChecks;
|
|
||||||
|
|
||||||
if (hasComputableBounds(SE, StridesMap, Ptr) &&
|
if (hasComputableBounds(SE, StridesMap, Ptr) &&
|
||||||
// When we run after a failing dependency check we have to make sure
|
// When we run after a failing dependency check we have to make sure
|
||||||
// we don't have wrapping pointers.
|
// we don't have wrapping pointers.
|
||||||
|
@ -349,16 +350,15 @@ bool AccessAnalysis::canCheckPtrAtRT(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
|
|
||||||
NumComparisons += 0; // Only one dependence set.
|
|
||||||
else {
|
|
||||||
NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks +
|
|
||||||
NumWritePtrChecks - 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
++ASId;
|
++ASId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We need a runtime check if there are any accesses that need checking.
|
||||||
|
// However, some accesses cannot be checked (for example because we
|
||||||
|
// can't determine their bounds). In these cases we would need a check
|
||||||
|
// but wouldn't be able to add it.
|
||||||
|
NeedRTCheck = !CanDoRT || RtCheck.needsAnyChecking(nullptr);
|
||||||
|
|
||||||
// If the pointers that we would use for the bounds comparison have different
|
// If the pointers that we would use for the bounds comparison have different
|
||||||
// address spaces, assume the values aren't directly comparable, so we can't
|
// address spaces, assume the values aren't directly comparable, so we can't
|
||||||
// use them for the runtime check. We also have to assume they could
|
// use them for the runtime check. We also have to assume they could
|
||||||
|
@ -1207,22 +1207,17 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
|
||||||
// Build dependence sets and check whether we need a runtime pointer bounds
|
// Build dependence sets and check whether we need a runtime pointer bounds
|
||||||
// check.
|
// check.
|
||||||
Accesses.buildDependenceSets();
|
Accesses.buildDependenceSets();
|
||||||
bool NeedRTCheck = Accesses.isRTCheckNeeded();
|
|
||||||
|
|
||||||
// Find pointers with computable bounds. We are going to use this information
|
// Find pointers with computable bounds. We are going to use this information
|
||||||
// to place a runtime bound check.
|
// to place a runtime bound check.
|
||||||
bool CanDoRT = false;
|
bool NeedRTCheck;
|
||||||
if (NeedRTCheck)
|
bool CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck,
|
||||||
CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop,
|
NeedRTCheck, SE,
|
||||||
Strides);
|
TheLoop, Strides);
|
||||||
|
|
||||||
DEBUG(dbgs() << "LAA: We need to do " << NumComparisons <<
|
DEBUG(dbgs() << "LAA: We need to do "
|
||||||
" pointer comparisons.\n");
|
<< PtrRtCheck.getNumberOfChecks(nullptr)
|
||||||
|
<< " pointer comparisons.\n");
|
||||||
// If we only have one set of dependences to check pointers among we don't
|
|
||||||
// need a runtime check.
|
|
||||||
if (NumComparisons == 0 && NeedRTCheck)
|
|
||||||
NeedRTCheck = false;
|
|
||||||
|
|
||||||
// Check that we found the bounds for the pointer.
|
// Check that we found the bounds for the pointer.
|
||||||
if (CanDoRT)
|
if (CanDoRT)
|
||||||
|
@ -1255,10 +1250,11 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
|
||||||
PtrRtCheck.reset();
|
PtrRtCheck.reset();
|
||||||
PtrRtCheck.Need = true;
|
PtrRtCheck.Need = true;
|
||||||
|
|
||||||
CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
|
CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NeedRTCheck, SE,
|
||||||
TheLoop, Strides, true);
|
TheLoop, Strides, true);
|
||||||
|
|
||||||
// Check that we found the bounds for the pointer.
|
// Check that we found the bounds for the pointer.
|
||||||
if (!CanDoRT && NumComparisons > 0) {
|
if (NeedRTCheck && !CanDoRT) {
|
||||||
emitAnalysis(LoopAccessReport()
|
emitAnalysis(LoopAccessReport()
|
||||||
<< "cannot check memory dependencies at runtime");
|
<< "cannot check memory dependencies at runtime");
|
||||||
DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
|
DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
|
||||||
|
@ -1403,7 +1399,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
|
||||||
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
|
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
|
||||||
DominatorTree *DT, LoopInfo *LI,
|
DominatorTree *DT, LoopInfo *LI,
|
||||||
const ValueToValueMap &Strides)
|
const ValueToValueMap &Strides)
|
||||||
: DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL),
|
: DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
|
||||||
TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
|
TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
|
||||||
MaxSafeDepDistBytes(-1U), CanVecMem(false),
|
MaxSafeDepDistBytes(-1U), CanVecMem(false),
|
||||||
StoreToLoopInvariantAddress(false) {
|
StoreToLoopInvariantAddress(false) {
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
|
||||||
|
|
||||||
|
; 3 reads and 3 writes should need 12 memchecks
|
||||||
|
|
||||||
|
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||||
|
target triple = "aarch64--linux-gnueabi"
|
||||||
|
|
||||||
|
; CHECK: Memory dependences are safe with run-time checks
|
||||||
|
; Memory dependecies have labels starting from 0, so in
|
||||||
|
; order to verify that we have n checks, we look for
|
||||||
|
; (n-1): and not n:.
|
||||||
|
|
||||||
|
; CHECK: Run-time memory checks:
|
||||||
|
; CHECK-NEXT: 0:
|
||||||
|
; CHECK: 11:
|
||||||
|
; CHECK-NOT: 12:
|
||||||
|
|
||||||
|
define void @testf(i16* %a,
|
||||||
|
i16* %b,
|
||||||
|
i16* %c,
|
||||||
|
i16* %d,
|
||||||
|
i16* %e,
|
||||||
|
i16* %f) {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body, %entry
|
||||||
|
%ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
|
||||||
|
|
||||||
|
%add = add nuw nsw i64 %ind, 1
|
||||||
|
|
||||||
|
%arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
|
||||||
|
%loadA = load i16, i16* %arrayidxA, align 2
|
||||||
|
|
||||||
|
%arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
|
||||||
|
%loadB = load i16, i16* %arrayidxB, align 2
|
||||||
|
|
||||||
|
%arrayidxC = getelementptr inbounds i16, i16* %c, i64 %ind
|
||||||
|
%loadC = load i16, i16* %arrayidxC, align 2
|
||||||
|
|
||||||
|
%mul = mul i16 %loadB, %loadA
|
||||||
|
%mul1 = mul i16 %mul, %loadC
|
||||||
|
|
||||||
|
%arrayidxD = getelementptr inbounds i16, i16* %d, i64 %ind
|
||||||
|
store i16 %mul1, i16* %arrayidxD, align 2
|
||||||
|
|
||||||
|
%arrayidxE = getelementptr inbounds i16, i16* %e, i64 %ind
|
||||||
|
store i16 %mul, i16* %arrayidxE, align 2
|
||||||
|
|
||||||
|
%arrayidxF = getelementptr inbounds i16, i16* %f, i64 %ind
|
||||||
|
store i16 %mul1, i16* %arrayidxF, align 2
|
||||||
|
|
||||||
|
%exitcond = icmp eq i64 %add, 20
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end: ; preds = %for.body
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue