forked from OSchip/llvm-project
[ScheduleOptimizer] Allow tiling after fusion
In ScheduleOptimizer::isTileableBand(), allow the case in which the band node's child is an isl_schedule_sequence_node and its grandchildren isl_schedule_leaf_nodes. This case can arise when two or more statements are fused by the isl scheduler. The tile_after_fusion.ll test has two statements in separate loop nests and checks whether they are tiled after being fused when polly-opt-fusion equals "max". Reviewers: grosser Subscribers: gareevroman, pollydev Tags: #polly Contributed-by: Theodoros Theodoridis <theodort@student.ethz.ch> Differential Revision: https://reviews.llvm.org/D30815 llvm-svn: 297587
This commit is contained in:
parent
f06b963a2b
commit
c9d4cb2f42
|
@ -433,6 +433,34 @@ ScheduleTreeOptimizer::applyRegisterTiling(__isl_take isl_schedule_node *Node,
|
|||
return Node;
|
||||
}
|
||||
|
||||
namespace {
|
||||
bool isSimpleInnermostBand(const isl::schedule_node &Node) {
|
||||
assert(isl_schedule_node_get_type(Node.keep()) == isl_schedule_node_band);
|
||||
assert(isl_schedule_node_n_children(Node.keep()) == 1);
|
||||
|
||||
auto ChildType = isl_schedule_node_get_type(Node.child(0).keep());
|
||||
|
||||
if (ChildType == isl_schedule_node_leaf)
|
||||
return true;
|
||||
|
||||
if (ChildType != isl_schedule_node_sequence)
|
||||
return false;
|
||||
|
||||
auto Sequence = Node.child(0);
|
||||
|
||||
for (int c = 0, nc = isl_schedule_node_n_children(Sequence.keep()); c < nc;
|
||||
++c) {
|
||||
auto Child = Sequence.child(c);
|
||||
if (isl_schedule_node_get_type(Child.keep()) != isl_schedule_node_filter)
|
||||
return false;
|
||||
if (isl_schedule_node_get_type(Child.child(0).keep()) !=
|
||||
isl_schedule_node_leaf)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
bool ScheduleTreeOptimizer::isTileableBandNode(
|
||||
__isl_keep isl_schedule_node *Node) {
|
||||
if (isl_schedule_node_get_type(Node) != isl_schedule_node_band)
|
||||
|
@ -451,14 +479,8 @@ bool ScheduleTreeOptimizer::isTileableBandNode(
|
|||
if (Dims <= 1)
|
||||
return false;
|
||||
|
||||
auto Child = isl_schedule_node_get_child(Node, 0);
|
||||
auto Type = isl_schedule_node_get_type(Child);
|
||||
isl_schedule_node_free(Child);
|
||||
|
||||
if (Type != isl_schedule_node_leaf)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
auto ManagedNode = isl::manage(isl_schedule_node_copy(Node));
|
||||
return isSimpleInnermostBand(ManagedNode);
|
||||
}
|
||||
|
||||
__isl_give isl_schedule_node *
|
||||
|
|
|
@ -0,0 +1,139 @@
|
|||
; RUN: opt %loadPolly -polly-opt-isl -polly-ast -polly-opt-fusion=max -analyze < %s | FileCheck %s
|
||||
;
|
||||
;
|
||||
; void tf(int C[256][256][256], int A0[256][256][256], int A1[256][256][256]) {
|
||||
; for (int i = 0; i < 256; ++i)
|
||||
; for (int j = 0; j < 256; ++j)
|
||||
; for (int k = 0; k < 256; ++k)
|
||||
; C[i][j][k] += A0[i][j][k];
|
||||
;
|
||||
; for (int i = 0; i < 256; ++i)
|
||||
; for (int j = 0; j < 256; ++j)
|
||||
; for (int k = 0; k < 256; ++k)
|
||||
; C[i][j][k] += A1[i][j][k];
|
||||
; }
|
||||
;
|
||||
; The tile_after_fusion.ll test has two statements in separate loop nests and
|
||||
; checks whether they are tiled after being fused when polly-opt-fusion equals
|
||||
; "max".
|
||||
;
|
||||
; CHECK: 1st level tiling - Tiles
|
||||
; CHECK-NEXT: for (int c0 = 0; c0 <= 7; c0 += 1)
|
||||
; CHECK-NEXT: for (int c1 = 0; c1 <= 7; c1 += 1)
|
||||
; CHECK-NEXT: for (int c2 = 0; c2 <= 7; c2 += 1) {
|
||||
; CHECK-NEXT: // 1st level tiling - Points
|
||||
; CHECK-NEXT: for (int c3 = 0; c3 <= 31; c3 += 1)
|
||||
; CHECK-NEXT: for (int c4 = 0; c4 <= 31; c4 += 1)
|
||||
; CHECK-NEXT: for (int c5 = 0; c5 <= 31; c5 += 1) {
|
||||
; CHECK-NEXT: Stmt_for_body6(32 * c0 + c3, 32 * c1 + c4, 32 * c2 + c5);
|
||||
; CHECK-NEXT: Stmt_for_body34(32 * c0 + c3, 32 * c1 + c4, 32 * c2 + c5);
|
||||
|
||||
source_filename = "tile_after_fusion.c"
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @tf([256 x [256 x i32]]* %C, [256 x [256 x i32]]* %A0, [256 x [256 x i32]]* %A1) {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc20, %entry
|
||||
%indvars.iv13 = phi i64 [ %indvars.iv.next14, %for.inc20 ], [ 0, %entry ]
|
||||
%exitcond15 = icmp ne i64 %indvars.iv13, 256
|
||||
br i1 %exitcond15, label %for.body, label %for.end22
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
br label %for.cond1
|
||||
|
||||
for.cond1: ; preds = %for.inc17, %for.body
|
||||
%indvars.iv10 = phi i64 [ %indvars.iv.next11, %for.inc17 ], [ 0, %for.body ]
|
||||
%exitcond12 = icmp ne i64 %indvars.iv10, 256
|
||||
br i1 %exitcond12, label %for.body3, label %for.end19
|
||||
|
||||
for.body3: ; preds = %for.cond1
|
||||
br label %for.cond4
|
||||
|
||||
for.cond4: ; preds = %for.inc, %for.body3
|
||||
%indvars.iv7 = phi i64 [ %indvars.iv.next8, %for.inc ], [ 0, %for.body3 ]
|
||||
%exitcond9 = icmp ne i64 %indvars.iv7, 256
|
||||
br i1 %exitcond9, label %for.body6, label %for.end
|
||||
|
||||
for.body6: ; preds = %for.cond4
|
||||
%arrayidx10 = getelementptr inbounds [256 x [256 x i32]], [256 x [256 x i32]]* %A0, i64 %indvars.iv13, i64 %indvars.iv10, i64 %indvars.iv7
|
||||
%tmp = load i32, i32* %arrayidx10, align 4
|
||||
%arrayidx16 = getelementptr inbounds [256 x [256 x i32]], [256 x [256 x i32]]* %C, i64 %indvars.iv13, i64 %indvars.iv10, i64 %indvars.iv7
|
||||
%tmp16 = load i32, i32* %arrayidx16, align 4
|
||||
%add = add nsw i32 %tmp16, %tmp
|
||||
store i32 %add, i32* %arrayidx16, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body6
|
||||
%indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1
|
||||
br label %for.cond4
|
||||
|
||||
for.end: ; preds = %for.cond4
|
||||
br label %for.inc17
|
||||
|
||||
for.inc17: ; preds = %for.end
|
||||
%indvars.iv.next11 = add nuw nsw i64 %indvars.iv10, 1
|
||||
br label %for.cond1
|
||||
|
||||
for.end19: ; preds = %for.cond1
|
||||
br label %for.inc20
|
||||
|
||||
for.inc20: ; preds = %for.end19
|
||||
%indvars.iv.next14 = add nuw nsw i64 %indvars.iv13, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end22: ; preds = %for.cond
|
||||
br label %for.cond24
|
||||
|
||||
for.cond24: ; preds = %for.inc54, %for.end22
|
||||
%indvars.iv4 = phi i64 [ %indvars.iv.next5, %for.inc54 ], [ 0, %for.end22 ]
|
||||
%exitcond6 = icmp ne i64 %indvars.iv4, 256
|
||||
br i1 %exitcond6, label %for.body26, label %for.end56
|
||||
|
||||
for.body26: ; preds = %for.cond24
|
||||
br label %for.cond28
|
||||
|
||||
for.cond28: ; preds = %for.inc51, %for.body26
|
||||
%indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc51 ], [ 0, %for.body26 ]
|
||||
%exitcond3 = icmp ne i64 %indvars.iv1, 256
|
||||
br i1 %exitcond3, label %for.body30, label %for.end53
|
||||
|
||||
for.body30: ; preds = %for.cond28
|
||||
br label %for.cond32
|
||||
|
||||
for.cond32: ; preds = %for.inc48, %for.body30
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc48 ], [ 0, %for.body30 ]
|
||||
%exitcond = icmp ne i64 %indvars.iv, 256
|
||||
br i1 %exitcond, label %for.body34, label %for.end50
|
||||
|
||||
for.body34: ; preds = %for.cond32
|
||||
%arrayidx40 = getelementptr inbounds [256 x [256 x i32]], [256 x [256 x i32]]* %A1, i64 %indvars.iv4, i64 %indvars.iv1, i64 %indvars.iv
|
||||
%tmp17 = load i32, i32* %arrayidx40, align 4
|
||||
%arrayidx46 = getelementptr inbounds [256 x [256 x i32]], [256 x [256 x i32]]* %C, i64 %indvars.iv4, i64 %indvars.iv1, i64 %indvars.iv
|
||||
%tmp18 = load i32, i32* %arrayidx46, align 4
|
||||
%add47 = add nsw i32 %tmp18, %tmp17
|
||||
store i32 %add47, i32* %arrayidx46, align 4
|
||||
br label %for.inc48
|
||||
|
||||
for.inc48: ; preds = %for.body34
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
br label %for.cond32
|
||||
|
||||
for.end50: ; preds = %for.cond32
|
||||
br label %for.inc51
|
||||
|
||||
for.inc51: ; preds = %for.end50
|
||||
%indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1
|
||||
br label %for.cond28
|
||||
|
||||
for.end53: ; preds = %for.cond28
|
||||
br label %for.inc54
|
||||
|
||||
for.inc54: ; preds = %for.end53
|
||||
%indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
|
||||
br label %for.cond24
|
||||
|
||||
for.end56: ; preds = %for.cond24
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue