[ScopInfo] Add statistics to count loops after scop modeling

llvm-svn: 295431
This commit is contained in:
Tobias Grosser 2017-02-17 08:12:36 +00:00
parent 65ce9362b8
commit cd01a363d6
4 changed files with 302 additions and 22 deletions

View File

@ -487,20 +487,9 @@ private:
/// a loop is assumed to be profitable and
/// consequently is counted.
/// returns A tuple of number of loops and their maximal depth.
ScopDetection::LoopStats
static ScopDetection::LoopStats
countBeneficialSubLoops(Loop *L, ScalarEvolution &SE,
unsigned MinProfitableTrips) const;
/// Count the number of loops and the maximal loop depth in @p R.
///
/// @param R The region to check
/// @param SE The scalar evolution analysis.
/// @param MinProfitableTrips The minimum number of trip counts from which
/// a loop is assumed to be profitable and
/// consequently is counted.
/// returns A tuple of number of loops and their maximal depth.
ScopDetection::LoopStats
countBeneficialLoops(Region *R, unsigned MinProfitableTrips) const;
unsigned MinProfitableTrips);
/// Check if the function @p F is marked as invalid.
///
@ -615,6 +604,18 @@ public:
virtual bool runOnFunction(Function &F);
virtual void print(raw_ostream &OS, const Module *) const;
//@}
/// Count the number of loops and the maximal loop depth in @p R.
///
/// @param R The region to check
/// @param SE The scalar evolution analysis.
/// @param MinProfitableTrips The minimum number of trip counts from which
/// a loop is assumed to be profitable and
/// consequently is counted.
/// returns A tuple of number of loops and their maximal depth.
static ScopDetection::LoopStats
countBeneficialLoops(Region *R, ScalarEvolution &SE, LoopInfo &LI,
unsigned MinProfitableTrips);
};
} // end namespace polly

View File

@ -1132,7 +1132,7 @@ bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const {
/// count that is not known to be less than @MinProfitableTrips.
ScopDetection::LoopStats
ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE,
unsigned MinProfitableTrips) const {
unsigned MinProfitableTrips) {
auto *TripCount = SE.getBackedgeTakenCount(L);
int NumLoops = 1;
@ -1152,22 +1152,22 @@ ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE,
}
ScopDetection::LoopStats
ScopDetection::countBeneficialLoops(Region *R,
unsigned MinProfitableTrips) const {
ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE,
LoopInfo &LI, unsigned MinProfitableTrips) {
int LoopNum = 0;
int MaxLoopDepth = 0;
auto L = LI->getLoopFor(R->getEntry());
auto L = LI.getLoopFor(R->getEntry());
L = L ? R->outermostLoopInRegion(L) : nullptr;
L = L ? L->getParentLoop() : nullptr;
auto SubLoops =
L ? L->getSubLoopsVector() : std::vector<Loop *>(LI->begin(), LI->end());
L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end());
for (auto &SubLoop : SubLoops)
if (R->contains(SubLoop)) {
LoopStats Stats =
countBeneficialSubLoops(SubLoop, *SE, MinProfitableTrips);
countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
LoopNum += Stats.NumLoops;
MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth);
}
@ -1388,7 +1388,8 @@ bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
if (!Context.hasStores || !Context.hasLoads)
return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
int NumLoops = countBeneficialLoops(&CurRegion, MIN_LOOP_TRIP_COUNT).NumLoops;
int NumLoops =
countBeneficialLoops(&CurRegion, *SE, *LI, MIN_LOOP_TRIP_COUNT).NumLoops;
int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
// Scops with at least two loops may allow either loop fusion or tiling and
@ -1616,7 +1617,7 @@ bool ScopDetection::runOnFunction(llvm::Function &F) {
continue;
if (!ValidRegions.count(&DC.CurRegion))
continue;
LoopStats Stats = countBeneficialLoops(&DC.CurRegion, 0);
LoopStats Stats = countBeneficialLoops(&DC.CurRegion, *SE, *LI, 0);
updateLoopCountStatistic(Stats, false /* OnlyProfitable */);
if (isProfitableRegion(DC)) {
updateLoopCountStatistic(Stats, true /* OnlyProfitable */);
@ -1627,7 +1628,7 @@ bool ScopDetection::runOnFunction(llvm::Function &F) {
}
NumProfScopRegions += ValidRegions.size();
NumLoopsOverall += countBeneficialLoops(TopRegion, 0).NumLoops;
NumLoopsOverall += countBeneficialLoops(TopRegion, *SE, *LI, 0).NumLoops;
// Only makes sense when we tracked errors.
if (PollyTrackFailures)

View File

@ -74,6 +74,16 @@ STATISTIC(AssumptionsInvariantLoad,
STATISTIC(AssumptionsDelinearization,
"Number of delinearization assumptions taken.");
STATISTIC(NumLoopsInScop, "Number of loops in scops");
STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
STATISTIC(NumScopsDepthLarger,
"Number of scops with maximal loop depth 6 and larger");
STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
// The maximal number of basic sets we allow during domain construction to
// be created. More complex scops will result in very high compile time and
// are also unlikely to result in good code
@ -4539,6 +4549,26 @@ void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}
void updateLoopCountStatistic(ScopDetection::LoopStats Stats) {
NumLoopsInScop += Stats.NumLoops;
MaxNumLoopsInScop =
std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops);
errs() << "MaxLoopDepth: " << Stats.MaxDepth << "\n";
if (Stats.MaxDepth == 1)
NumScopsDepthOne++;
else if (Stats.MaxDepth == 2)
NumScopsDepthTwo++;
else if (Stats.MaxDepth == 3)
NumScopsDepthThree++;
else if (Stats.MaxDepth == 4)
NumScopsDepthFour++;
else if (Stats.MaxDepth == 5)
NumScopsDepthFive++;
else
NumScopsDepthLarger++;
}
bool ScopInfoRegionPass::runOnRegion(Region *R, RGPassManager &RGM) {
auto &SD = getAnalysis<ScopDetection>();
@ -4554,6 +4584,13 @@ bool ScopInfoRegionPass::runOnRegion(Region *R, RGPassManager &RGM) {
ScopBuilder SB(R, AA, DL, DT, LI, SD, SE);
S = SB.getScop(); // take ownership of scop object
if (S) {
ScopDetection::LoopStats Stats =
ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0);
updateLoopCountStatistic(Stats);
}
return false;
}

View File

@ -0,0 +1,241 @@
; RUN: opt %loadPolly -polly-scops -stats -analyze < %s 2>&1 | FileCheck %s
; REQUIRES: asserts
; CHECK-DAG: 4 polly-scops - Maximal number of loops in scops
; CHECK-DAG: 10 polly-scops - Number of loops in scops
; CHECK-DAG: 1 polly-scops - Number of scops with maximal loop depth 4
; CHECK-DAG: 1 polly-scops - Number of scops with maximal loop depth 1
; CHECK-DAG: 1 polly-scops - Number of scops with maximal loop depth 3
; CHECK-DAG: 1 polly-scops - Number of scops with maximal loop depth 2
; CHECK-DAG: 4 polly-scops - Number of Scops containing a loop
; CHECK-DAG: 4 polly-scops - Number of valid Scops
;
; void foo_1d(float *A) {
; for (long i = 0; i < 1024; i++)
; A[i] += i;
; }
;
; void foo_2d(float *A) {
; for (long i = 0; i < 1024; i++)
; for (long j = 0; j < 1024; j++)
; A[i + j] += i + j;
; }
;
; void foo_3d(float *A) {
; for (long i = 0; i < 1024; i++)
; for (long j = 0; j < 1024; j++)
; for (long k = 0; k < 1024; k++)
; A[i + j + k] += i + j + k;
; }
;
; void foo_4d(float *A) {
; for (long i = 0; i < 1024; i++)
; for (long j = 0; j < 1024; j++)
; for (long k = 0; k < 1024; k++)
; for (long l = 0; l < 1024; l++)
; A[i + j + k + l] += i + j + k + l;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @foo_1d(float* %A) {
bb:
br label %bb1
bb1: ; preds = %bb6, %bb
%i.0 = phi i64 [ 0, %bb ], [ %tmp7, %bb6 ]
%exitcond = icmp ne i64 %i.0, 1024
br i1 %exitcond, label %bb2, label %bb8
bb2: ; preds = %bb1
%tmp = sitofp i64 %i.0 to float
%tmp3 = getelementptr inbounds float, float* %A, i64 %i.0
%tmp4 = load float, float* %tmp3, align 4
%tmp5 = fadd float %tmp4, %tmp
store float %tmp5, float* %tmp3, align 4
br label %bb6
bb6: ; preds = %bb2
%tmp7 = add nuw nsw i64 %i.0, 1
br label %bb1
bb8: ; preds = %bb1
ret void
}
define void @foo_2d(float* %A) {
bb:
br label %bb2
bb2: ; preds = %bb14, %bb
%i.0 = phi i64 [ 0, %bb ], [ %tmp15, %bb14 ]
%exitcond1 = icmp ne i64 %i.0, 1024
br i1 %exitcond1, label %bb3, label %bb16
bb3: ; preds = %bb2
br label %bb4
bb4: ; preds = %bb11, %bb3
%j.0 = phi i64 [ 0, %bb3 ], [ %tmp12, %bb11 ]
%exitcond = icmp ne i64 %j.0, 1024
br i1 %exitcond, label %bb5, label %bb13
bb5: ; preds = %bb4
%tmp = add nuw nsw i64 %i.0, %j.0
%tmp6 = sitofp i64 %tmp to float
%tmp7 = add nuw nsw i64 %i.0, %j.0
%tmp8 = getelementptr inbounds float, float* %A, i64 %tmp7
%tmp9 = load float, float* %tmp8, align 4
%tmp10 = fadd float %tmp9, %tmp6
store float %tmp10, float* %tmp8, align 4
br label %bb11
bb11: ; preds = %bb5
%tmp12 = add nuw nsw i64 %j.0, 1
br label %bb4
bb13: ; preds = %bb4
br label %bb14
bb14: ; preds = %bb13
%tmp15 = add nuw nsw i64 %i.0, 1
br label %bb2
bb16: ; preds = %bb2
ret void
}
define void @foo_3d(float* %A) {
bb:
br label %bb3
bb3: ; preds = %bb22, %bb
%i.0 = phi i64 [ 0, %bb ], [ %tmp23, %bb22 ]
%exitcond2 = icmp ne i64 %i.0, 1024
br i1 %exitcond2, label %bb4, label %bb24
bb4: ; preds = %bb3
br label %bb5
bb5: ; preds = %bb19, %bb4
%j.0 = phi i64 [ 0, %bb4 ], [ %tmp20, %bb19 ]
%exitcond1 = icmp ne i64 %j.0, 1024
br i1 %exitcond1, label %bb6, label %bb21
bb6: ; preds = %bb5
br label %bb7
bb7: ; preds = %bb16, %bb6
%k.0 = phi i64 [ 0, %bb6 ], [ %tmp17, %bb16 ]
%exitcond = icmp ne i64 %k.0, 1024
br i1 %exitcond, label %bb8, label %bb18
bb8: ; preds = %bb7
%tmp = add nuw nsw i64 %i.0, %j.0
%tmp9 = add nuw nsw i64 %tmp, %k.0
%tmp10 = sitofp i64 %tmp9 to float
%tmp11 = add nuw nsw i64 %i.0, %j.0
%tmp12 = add nuw nsw i64 %tmp11, %k.0
%tmp13 = getelementptr inbounds float, float* %A, i64 %tmp12
%tmp14 = load float, float* %tmp13, align 4
%tmp15 = fadd float %tmp14, %tmp10
store float %tmp15, float* %tmp13, align 4
br label %bb16
bb16: ; preds = %bb8
%tmp17 = add nuw nsw i64 %k.0, 1
br label %bb7
bb18: ; preds = %bb7
br label %bb19
bb19: ; preds = %bb18
%tmp20 = add nuw nsw i64 %j.0, 1
br label %bb5
bb21: ; preds = %bb5
br label %bb22
bb22: ; preds = %bb21
%tmp23 = add nuw nsw i64 %i.0, 1
br label %bb3
bb24: ; preds = %bb3
ret void
}
define void @foo_4d(float* %A) {
bb:
br label %bb4
bb4: ; preds = %bb30, %bb
%i.0 = phi i64 [ 0, %bb ], [ %tmp31, %bb30 ]
%exitcond3 = icmp ne i64 %i.0, 1024
br i1 %exitcond3, label %bb5, label %bb32
bb5: ; preds = %bb4
br label %bb6
bb6: ; preds = %bb27, %bb5
%j.0 = phi i64 [ 0, %bb5 ], [ %tmp28, %bb27 ]
%exitcond2 = icmp ne i64 %j.0, 1024
br i1 %exitcond2, label %bb7, label %bb29
bb7: ; preds = %bb6
br label %bb8
bb8: ; preds = %bb24, %bb7
%k.0 = phi i64 [ 0, %bb7 ], [ %tmp25, %bb24 ]
%exitcond1 = icmp ne i64 %k.0, 1024
br i1 %exitcond1, label %bb9, label %bb26
bb9: ; preds = %bb8
br label %bb10
bb10: ; preds = %bb21, %bb9
%l.0 = phi i64 [ 0, %bb9 ], [ %tmp22, %bb21 ]
%exitcond = icmp ne i64 %l.0, 1024
br i1 %exitcond, label %bb11, label %bb23
bb11: ; preds = %bb10
%tmp = add nuw nsw i64 %i.0, %j.0
%tmp12 = add nuw nsw i64 %tmp, %k.0
%tmp13 = add nuw nsw i64 %tmp12, %l.0
%tmp14 = sitofp i64 %tmp13 to float
%tmp15 = add nuw nsw i64 %i.0, %j.0
%tmp16 = add nuw nsw i64 %tmp15, %k.0
%tmp17 = add nuw nsw i64 %tmp16, %l.0
%tmp18 = getelementptr inbounds float, float* %A, i64 %tmp17
%tmp19 = load float, float* %tmp18, align 4
%tmp20 = fadd float %tmp19, %tmp14
store float %tmp20, float* %tmp18, align 4
br label %bb21
bb21: ; preds = %bb11
%tmp22 = add nuw nsw i64 %l.0, 1
br label %bb10
bb23: ; preds = %bb10
br label %bb24
bb24: ; preds = %bb23
%tmp25 = add nuw nsw i64 %k.0, 1
br label %bb8
bb26: ; preds = %bb8
br label %bb27
bb27: ; preds = %bb26
%tmp28 = add nuw nsw i64 %j.0, 1
br label %bb6
bb29: ; preds = %bb6
br label %bb30
bb30: ; preds = %bb29
%tmp31 = add nuw nsw i64 %i.0, 1
br label %bb4
bb32: ; preds = %bb4
ret void
}