forked from OSchip/llvm-project
[ScopInfo] Add statistics to count loops after scop modeling
llvm-svn: 295431
This commit is contained in:
parent
65ce9362b8
commit
cd01a363d6
|
@ -487,20 +487,9 @@ private:
|
|||
/// a loop is assumed to be profitable and
|
||||
/// consequently is counted.
|
||||
/// returns A tuple of number of loops and their maximal depth.
|
||||
ScopDetection::LoopStats
|
||||
static ScopDetection::LoopStats
|
||||
countBeneficialSubLoops(Loop *L, ScalarEvolution &SE,
|
||||
unsigned MinProfitableTrips) const;
|
||||
|
||||
/// Count the number of loops and the maximal loop depth in @p R.
|
||||
///
|
||||
/// @param R The region to check
|
||||
/// @param SE The scalar evolution analysis.
|
||||
/// @param MinProfitableTrips The minimum number of trip counts from which
|
||||
/// a loop is assumed to be profitable and
|
||||
/// consequently is counted.
|
||||
/// returns A tuple of number of loops and their maximal depth.
|
||||
ScopDetection::LoopStats
|
||||
countBeneficialLoops(Region *R, unsigned MinProfitableTrips) const;
|
||||
unsigned MinProfitableTrips);
|
||||
|
||||
/// Check if the function @p F is marked as invalid.
|
||||
///
|
||||
|
@ -615,6 +604,18 @@ public:
|
|||
virtual bool runOnFunction(Function &F);
|
||||
virtual void print(raw_ostream &OS, const Module *) const;
|
||||
//@}
|
||||
|
||||
/// Count the number of loops and the maximal loop depth in @p R.
|
||||
///
|
||||
/// @param R The region to check
|
||||
/// @param SE The scalar evolution analysis.
|
||||
/// @param MinProfitableTrips The minimum number of trip counts from which
|
||||
/// a loop is assumed to be profitable and
|
||||
/// consequently is counted.
|
||||
/// returns A tuple of number of loops and their maximal depth.
|
||||
static ScopDetection::LoopStats
|
||||
countBeneficialLoops(Region *R, ScalarEvolution &SE, LoopInfo &LI,
|
||||
unsigned MinProfitableTrips);
|
||||
};
|
||||
|
||||
} // end namespace polly
|
||||
|
|
|
@ -1132,7 +1132,7 @@ bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const {
|
|||
/// count that is not known to be less than @MinProfitableTrips.
|
||||
ScopDetection::LoopStats
|
||||
ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE,
|
||||
unsigned MinProfitableTrips) const {
|
||||
unsigned MinProfitableTrips) {
|
||||
auto *TripCount = SE.getBackedgeTakenCount(L);
|
||||
|
||||
int NumLoops = 1;
|
||||
|
@ -1152,22 +1152,22 @@ ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE,
|
|||
}
|
||||
|
||||
ScopDetection::LoopStats
|
||||
ScopDetection::countBeneficialLoops(Region *R,
|
||||
unsigned MinProfitableTrips) const {
|
||||
ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE,
|
||||
LoopInfo &LI, unsigned MinProfitableTrips) {
|
||||
int LoopNum = 0;
|
||||
int MaxLoopDepth = 0;
|
||||
|
||||
auto L = LI->getLoopFor(R->getEntry());
|
||||
auto L = LI.getLoopFor(R->getEntry());
|
||||
L = L ? R->outermostLoopInRegion(L) : nullptr;
|
||||
L = L ? L->getParentLoop() : nullptr;
|
||||
|
||||
auto SubLoops =
|
||||
L ? L->getSubLoopsVector() : std::vector<Loop *>(LI->begin(), LI->end());
|
||||
L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end());
|
||||
|
||||
for (auto &SubLoop : SubLoops)
|
||||
if (R->contains(SubLoop)) {
|
||||
LoopStats Stats =
|
||||
countBeneficialSubLoops(SubLoop, *SE, MinProfitableTrips);
|
||||
countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
|
||||
LoopNum += Stats.NumLoops;
|
||||
MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth);
|
||||
}
|
||||
|
@ -1388,7 +1388,8 @@ bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
|
|||
if (!Context.hasStores || !Context.hasLoads)
|
||||
return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
|
||||
|
||||
int NumLoops = countBeneficialLoops(&CurRegion, MIN_LOOP_TRIP_COUNT).NumLoops;
|
||||
int NumLoops =
|
||||
countBeneficialLoops(&CurRegion, *SE, *LI, MIN_LOOP_TRIP_COUNT).NumLoops;
|
||||
int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
|
||||
|
||||
// Scops with at least two loops may allow either loop fusion or tiling and
|
||||
|
@ -1616,7 +1617,7 @@ bool ScopDetection::runOnFunction(llvm::Function &F) {
|
|||
continue;
|
||||
if (!ValidRegions.count(&DC.CurRegion))
|
||||
continue;
|
||||
LoopStats Stats = countBeneficialLoops(&DC.CurRegion, 0);
|
||||
LoopStats Stats = countBeneficialLoops(&DC.CurRegion, *SE, *LI, 0);
|
||||
updateLoopCountStatistic(Stats, false /* OnlyProfitable */);
|
||||
if (isProfitableRegion(DC)) {
|
||||
updateLoopCountStatistic(Stats, true /* OnlyProfitable */);
|
||||
|
@ -1627,7 +1628,7 @@ bool ScopDetection::runOnFunction(llvm::Function &F) {
|
|||
}
|
||||
|
||||
NumProfScopRegions += ValidRegions.size();
|
||||
NumLoopsOverall += countBeneficialLoops(TopRegion, 0).NumLoops;
|
||||
NumLoopsOverall += countBeneficialLoops(TopRegion, *SE, *LI, 0).NumLoops;
|
||||
|
||||
// Only makes sense when we tracked errors.
|
||||
if (PollyTrackFailures)
|
||||
|
|
|
@ -74,6 +74,16 @@ STATISTIC(AssumptionsInvariantLoad,
|
|||
STATISTIC(AssumptionsDelinearization,
|
||||
"Number of delinearization assumptions taken.");
|
||||
|
||||
STATISTIC(NumLoopsInScop, "Number of loops in scops");
|
||||
STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
|
||||
STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
|
||||
STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
|
||||
STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
|
||||
STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
|
||||
STATISTIC(NumScopsDepthLarger,
|
||||
"Number of scops with maximal loop depth 6 and larger");
|
||||
STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
|
||||
|
||||
// The maximal number of basic sets we allow during domain construction to
|
||||
// be created. More complex scops will result in very high compile time and
|
||||
// are also unlikely to result in good code
|
||||
|
@ -4539,6 +4549,26 @@ void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage &AU) const {
|
|||
AU.setPreservesAll();
|
||||
}
|
||||
|
||||
void updateLoopCountStatistic(ScopDetection::LoopStats Stats) {
|
||||
NumLoopsInScop += Stats.NumLoops;
|
||||
MaxNumLoopsInScop =
|
||||
std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops);
|
||||
|
||||
errs() << "MaxLoopDepth: " << Stats.MaxDepth << "\n";
|
||||
if (Stats.MaxDepth == 1)
|
||||
NumScopsDepthOne++;
|
||||
else if (Stats.MaxDepth == 2)
|
||||
NumScopsDepthTwo++;
|
||||
else if (Stats.MaxDepth == 3)
|
||||
NumScopsDepthThree++;
|
||||
else if (Stats.MaxDepth == 4)
|
||||
NumScopsDepthFour++;
|
||||
else if (Stats.MaxDepth == 5)
|
||||
NumScopsDepthFive++;
|
||||
else
|
||||
NumScopsDepthLarger++;
|
||||
}
|
||||
|
||||
bool ScopInfoRegionPass::runOnRegion(Region *R, RGPassManager &RGM) {
|
||||
auto &SD = getAnalysis<ScopDetection>();
|
||||
|
||||
|
@ -4554,6 +4584,13 @@ bool ScopInfoRegionPass::runOnRegion(Region *R, RGPassManager &RGM) {
|
|||
|
||||
ScopBuilder SB(R, AA, DL, DT, LI, SD, SE);
|
||||
S = SB.getScop(); // take ownership of scop object
|
||||
|
||||
if (S) {
|
||||
ScopDetection::LoopStats Stats =
|
||||
ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0);
|
||||
updateLoopCountStatistic(Stats);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,241 @@
|
|||
; RUN: opt %loadPolly -polly-scops -stats -analyze < %s 2>&1 | FileCheck %s
|
||||
; REQUIRES: asserts
|
||||
|
||||
; CHECK-DAG: 4 polly-scops - Maximal number of loops in scops
|
||||
; CHECK-DAG: 10 polly-scops - Number of loops in scops
|
||||
; CHECK-DAG: 1 polly-scops - Number of scops with maximal loop depth 4
|
||||
; CHECK-DAG: 1 polly-scops - Number of scops with maximal loop depth 1
|
||||
; CHECK-DAG: 1 polly-scops - Number of scops with maximal loop depth 3
|
||||
; CHECK-DAG: 1 polly-scops - Number of scops with maximal loop depth 2
|
||||
; CHECK-DAG: 4 polly-scops - Number of Scops containing a loop
|
||||
; CHECK-DAG: 4 polly-scops - Number of valid Scops
|
||||
;
|
||||
; void foo_1d(float *A) {
|
||||
; for (long i = 0; i < 1024; i++)
|
||||
; A[i] += i;
|
||||
; }
|
||||
;
|
||||
; void foo_2d(float *A) {
|
||||
; for (long i = 0; i < 1024; i++)
|
||||
; for (long j = 0; j < 1024; j++)
|
||||
; A[i + j] += i + j;
|
||||
; }
|
||||
;
|
||||
; void foo_3d(float *A) {
|
||||
; for (long i = 0; i < 1024; i++)
|
||||
; for (long j = 0; j < 1024; j++)
|
||||
; for (long k = 0; k < 1024; k++)
|
||||
; A[i + j + k] += i + j + k;
|
||||
; }
|
||||
;
|
||||
; void foo_4d(float *A) {
|
||||
; for (long i = 0; i < 1024; i++)
|
||||
; for (long j = 0; j < 1024; j++)
|
||||
; for (long k = 0; k < 1024; k++)
|
||||
; for (long l = 0; l < 1024; l++)
|
||||
; A[i + j + k + l] += i + j + k + l;
|
||||
; }
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @foo_1d(float* %A) {
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb6, %bb
|
||||
%i.0 = phi i64 [ 0, %bb ], [ %tmp7, %bb6 ]
|
||||
%exitcond = icmp ne i64 %i.0, 1024
|
||||
br i1 %exitcond, label %bb2, label %bb8
|
||||
|
||||
bb2: ; preds = %bb1
|
||||
%tmp = sitofp i64 %i.0 to float
|
||||
%tmp3 = getelementptr inbounds float, float* %A, i64 %i.0
|
||||
%tmp4 = load float, float* %tmp3, align 4
|
||||
%tmp5 = fadd float %tmp4, %tmp
|
||||
store float %tmp5, float* %tmp3, align 4
|
||||
br label %bb6
|
||||
|
||||
bb6: ; preds = %bb2
|
||||
%tmp7 = add nuw nsw i64 %i.0, 1
|
||||
br label %bb1
|
||||
|
||||
bb8: ; preds = %bb1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @foo_2d(float* %A) {
|
||||
bb:
|
||||
br label %bb2
|
||||
|
||||
bb2: ; preds = %bb14, %bb
|
||||
%i.0 = phi i64 [ 0, %bb ], [ %tmp15, %bb14 ]
|
||||
%exitcond1 = icmp ne i64 %i.0, 1024
|
||||
br i1 %exitcond1, label %bb3, label %bb16
|
||||
|
||||
bb3: ; preds = %bb2
|
||||
br label %bb4
|
||||
|
||||
bb4: ; preds = %bb11, %bb3
|
||||
%j.0 = phi i64 [ 0, %bb3 ], [ %tmp12, %bb11 ]
|
||||
%exitcond = icmp ne i64 %j.0, 1024
|
||||
br i1 %exitcond, label %bb5, label %bb13
|
||||
|
||||
bb5: ; preds = %bb4
|
||||
%tmp = add nuw nsw i64 %i.0, %j.0
|
||||
%tmp6 = sitofp i64 %tmp to float
|
||||
%tmp7 = add nuw nsw i64 %i.0, %j.0
|
||||
%tmp8 = getelementptr inbounds float, float* %A, i64 %tmp7
|
||||
%tmp9 = load float, float* %tmp8, align 4
|
||||
%tmp10 = fadd float %tmp9, %tmp6
|
||||
store float %tmp10, float* %tmp8, align 4
|
||||
br label %bb11
|
||||
|
||||
bb11: ; preds = %bb5
|
||||
%tmp12 = add nuw nsw i64 %j.0, 1
|
||||
br label %bb4
|
||||
|
||||
bb13: ; preds = %bb4
|
||||
br label %bb14
|
||||
|
||||
bb14: ; preds = %bb13
|
||||
%tmp15 = add nuw nsw i64 %i.0, 1
|
||||
br label %bb2
|
||||
|
||||
bb16: ; preds = %bb2
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @foo_3d(float* %A) {
|
||||
bb:
|
||||
br label %bb3
|
||||
|
||||
bb3: ; preds = %bb22, %bb
|
||||
%i.0 = phi i64 [ 0, %bb ], [ %tmp23, %bb22 ]
|
||||
%exitcond2 = icmp ne i64 %i.0, 1024
|
||||
br i1 %exitcond2, label %bb4, label %bb24
|
||||
|
||||
bb4: ; preds = %bb3
|
||||
br label %bb5
|
||||
|
||||
bb5: ; preds = %bb19, %bb4
|
||||
%j.0 = phi i64 [ 0, %bb4 ], [ %tmp20, %bb19 ]
|
||||
%exitcond1 = icmp ne i64 %j.0, 1024
|
||||
br i1 %exitcond1, label %bb6, label %bb21
|
||||
|
||||
bb6: ; preds = %bb5
|
||||
br label %bb7
|
||||
|
||||
bb7: ; preds = %bb16, %bb6
|
||||
%k.0 = phi i64 [ 0, %bb6 ], [ %tmp17, %bb16 ]
|
||||
%exitcond = icmp ne i64 %k.0, 1024
|
||||
br i1 %exitcond, label %bb8, label %bb18
|
||||
|
||||
bb8: ; preds = %bb7
|
||||
%tmp = add nuw nsw i64 %i.0, %j.0
|
||||
%tmp9 = add nuw nsw i64 %tmp, %k.0
|
||||
%tmp10 = sitofp i64 %tmp9 to float
|
||||
%tmp11 = add nuw nsw i64 %i.0, %j.0
|
||||
%tmp12 = add nuw nsw i64 %tmp11, %k.0
|
||||
%tmp13 = getelementptr inbounds float, float* %A, i64 %tmp12
|
||||
%tmp14 = load float, float* %tmp13, align 4
|
||||
%tmp15 = fadd float %tmp14, %tmp10
|
||||
store float %tmp15, float* %tmp13, align 4
|
||||
br label %bb16
|
||||
|
||||
bb16: ; preds = %bb8
|
||||
%tmp17 = add nuw nsw i64 %k.0, 1
|
||||
br label %bb7
|
||||
|
||||
bb18: ; preds = %bb7
|
||||
br label %bb19
|
||||
|
||||
bb19: ; preds = %bb18
|
||||
%tmp20 = add nuw nsw i64 %j.0, 1
|
||||
br label %bb5
|
||||
|
||||
bb21: ; preds = %bb5
|
||||
br label %bb22
|
||||
|
||||
bb22: ; preds = %bb21
|
||||
%tmp23 = add nuw nsw i64 %i.0, 1
|
||||
br label %bb3
|
||||
|
||||
bb24: ; preds = %bb3
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @foo_4d(float* %A) {
|
||||
bb:
|
||||
br label %bb4
|
||||
|
||||
bb4: ; preds = %bb30, %bb
|
||||
%i.0 = phi i64 [ 0, %bb ], [ %tmp31, %bb30 ]
|
||||
%exitcond3 = icmp ne i64 %i.0, 1024
|
||||
br i1 %exitcond3, label %bb5, label %bb32
|
||||
|
||||
bb5: ; preds = %bb4
|
||||
br label %bb6
|
||||
|
||||
bb6: ; preds = %bb27, %bb5
|
||||
%j.0 = phi i64 [ 0, %bb5 ], [ %tmp28, %bb27 ]
|
||||
%exitcond2 = icmp ne i64 %j.0, 1024
|
||||
br i1 %exitcond2, label %bb7, label %bb29
|
||||
|
||||
bb7: ; preds = %bb6
|
||||
br label %bb8
|
||||
|
||||
bb8: ; preds = %bb24, %bb7
|
||||
%k.0 = phi i64 [ 0, %bb7 ], [ %tmp25, %bb24 ]
|
||||
%exitcond1 = icmp ne i64 %k.0, 1024
|
||||
br i1 %exitcond1, label %bb9, label %bb26
|
||||
|
||||
bb9: ; preds = %bb8
|
||||
br label %bb10
|
||||
|
||||
bb10: ; preds = %bb21, %bb9
|
||||
%l.0 = phi i64 [ 0, %bb9 ], [ %tmp22, %bb21 ]
|
||||
%exitcond = icmp ne i64 %l.0, 1024
|
||||
br i1 %exitcond, label %bb11, label %bb23
|
||||
|
||||
bb11: ; preds = %bb10
|
||||
%tmp = add nuw nsw i64 %i.0, %j.0
|
||||
%tmp12 = add nuw nsw i64 %tmp, %k.0
|
||||
%tmp13 = add nuw nsw i64 %tmp12, %l.0
|
||||
%tmp14 = sitofp i64 %tmp13 to float
|
||||
%tmp15 = add nuw nsw i64 %i.0, %j.0
|
||||
%tmp16 = add nuw nsw i64 %tmp15, %k.0
|
||||
%tmp17 = add nuw nsw i64 %tmp16, %l.0
|
||||
%tmp18 = getelementptr inbounds float, float* %A, i64 %tmp17
|
||||
%tmp19 = load float, float* %tmp18, align 4
|
||||
%tmp20 = fadd float %tmp19, %tmp14
|
||||
store float %tmp20, float* %tmp18, align 4
|
||||
br label %bb21
|
||||
|
||||
bb21: ; preds = %bb11
|
||||
%tmp22 = add nuw nsw i64 %l.0, 1
|
||||
br label %bb10
|
||||
|
||||
bb23: ; preds = %bb10
|
||||
br label %bb24
|
||||
|
||||
bb24: ; preds = %bb23
|
||||
%tmp25 = add nuw nsw i64 %k.0, 1
|
||||
br label %bb8
|
||||
|
||||
bb26: ; preds = %bb8
|
||||
br label %bb27
|
||||
|
||||
bb27: ; preds = %bb26
|
||||
%tmp28 = add nuw nsw i64 %j.0, 1
|
||||
br label %bb6
|
||||
|
||||
bb29: ; preds = %bb6
|
||||
br label %bb30
|
||||
|
||||
bb30: ; preds = %bb29
|
||||
%tmp31 = add nuw nsw i64 %i.0, 1
|
||||
br label %bb4
|
||||
|
||||
bb32: ; preds = %bb4
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue