forked from OSchip/llvm-project
Loop unroller: set thresholds for optsize and minsize functions to zero
Before r268509, Clang would disable the loop unroll pass when optimizing for size. That commit enabled it to be able to support unroll pragmas in -Os builds. However, this regressed binary size in one of Chromium's DLLs with ~100 KB. This restores the original behaviour of no unrolling at -Os, but doing it in LLVM instead of Clang makes more sense, and also allows the pragmas to keep working. Differential revision: http://reviews.llvm.org/D20115 llvm-svn: 269124
This commit is contained in:
parent
12de4aeeb3
commit
719b26ba54
|
@ -111,9 +111,9 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
|
|||
UP.Threshold = 150;
|
||||
UP.PercentDynamicCostSavedThreshold = 20;
|
||||
UP.DynamicCostSavingsDiscount = 2000;
|
||||
UP.OptSizeThreshold = 50;
|
||||
UP.OptSizeThreshold = 0;
|
||||
UP.PartialThreshold = UP.Threshold;
|
||||
UP.PartialOptSizeThreshold = UP.OptSizeThreshold;
|
||||
UP.PartialOptSizeThreshold = 0;
|
||||
UP.Count = 0;
|
||||
UP.MaxCount = UINT_MAX;
|
||||
UP.FullUnrollMaxCount = UINT_MAX;
|
||||
|
|
|
@ -1,30 +1,5 @@
|
|||
; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s -check-prefix=EPILOG
|
||||
; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
|
||||
define void @unroll_opt_for_size() nounwind optsize {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
|
||||
%inc = add i32 %iv, 1
|
||||
%exitcnd = icmp uge i32 %inc, 1024
|
||||
br i1 %exitcnd, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; EPILOG-LABEL: @unroll_opt_for_size
|
||||
; EPILOG: add
|
||||
; EPILOG-NEXT: add
|
||||
; EPILOG-NEXT: add
|
||||
; EPILOG: icmp
|
||||
|
||||
; PROLOG-LABEL: @unroll_opt_for_size
|
||||
; PROLOG: add
|
||||
; PROLOG-NEXT: add
|
||||
; PROLOG-NEXT: add
|
||||
; PROLOG: icmp
|
||||
|
||||
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
|
||||
entry:
|
||||
|
|
|
@ -1,53 +1,4 @@
|
|||
; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -loop-unroll | FileCheck %s
|
||||
define void @unroll_opt_for_size() nounwind optsize {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
|
||||
%inc = add i32 %iv, 1
|
||||
%exitcnd = icmp uge i32 %inc, 1024
|
||||
br i1 %exitcnd, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @unroll_opt_for_size
|
||||
; CHECK: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: icmp
|
||||
|
||||
define void @unroll_default() nounwind {
|
||||
entry:
|
||||
br label %loop
|
||||
|
|
|
@ -1,53 +0,0 @@
|
|||
; RUN: opt < %s -S -loop-unroll -unroll-allow-partial | FileCheck %s
|
||||
; RUN: sed -e 's/optsize/minsize/' %s | opt -S -loop-unroll -unroll-allow-partial | FileCheck %s
|
||||
|
||||
; Loop size = 3, when the function has the optsize attribute, the
|
||||
; OptSizeUnrollThreshold, i.e. 50, is used, hence the loop should be unrolled
|
||||
; by 32 times because (1 * 32) + 2 < 50 (whereas (1 * 64 + 2) is not).
|
||||
define void @unroll_opt_for_size() nounwind optsize {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
|
||||
%inc = add i32 %iv, 1
|
||||
%exitcnd = icmp uge i32 %inc, 1024
|
||||
br i1 %exitcnd, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: icmp
|
|
@ -4,10 +4,10 @@
|
|||
|
||||
;///////////////////// TEST 1 //////////////////////////////
|
||||
|
||||
; This test shows that with optsize attribute, the loop is unrolled
|
||||
; according to the specified unroll factor.
|
||||
; This test shows that the loop is unrolled according to the specified
|
||||
; unroll factor.
|
||||
|
||||
define void @Test1() nounwind optsize {
|
||||
define void @Test1() nounwind {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
|
@ -32,10 +32,10 @@ exit:
|
|||
|
||||
;///////////////////// TEST 2 //////////////////////////////
|
||||
|
||||
; This test shows that with minsize attribute, the loop is unrolled
|
||||
; according to the specified unroll factor.
|
||||
; This test shows that with optnone attribute, the loop is not unrolled
|
||||
; even if an unroll factor was specified.
|
||||
|
||||
define void @Test2() nounwind minsize {
|
||||
define void @Test2() nounwind optnone noinline {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
|
@ -52,45 +52,16 @@ exit:
|
|||
; CHECK_COUNT4-LABEL: @Test2
|
||||
; CHECK_COUNT4: phi
|
||||
; CHECK_COUNT4-NEXT: add
|
||||
; CHECK_COUNT4-NEXT: add
|
||||
; CHECK_COUNT4-NEXT: add
|
||||
; CHECK_COUNT4-NEXT: add
|
||||
; CHECK_COUNT4-NEXT: icmp
|
||||
|
||||
|
||||
;///////////////////// TEST 3 //////////////////////////////
|
||||
|
||||
; This test shows that with optnone attribute, the loop is not unrolled
|
||||
; even if an unroll factor was specified.
|
||||
|
||||
define void @Test3() nounwind optnone noinline {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
|
||||
%inc = add i32 %iv, 1
|
||||
%exitcnd = icmp uge i32 %inc, 1024
|
||||
br i1 %exitcnd, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK_COUNT4-LABEL: @Test3
|
||||
; CHECK_COUNT4: phi
|
||||
; CHECK_COUNT4-NEXT: add
|
||||
; CHECK_COUNT4-NEXT: icmp
|
||||
|
||||
|
||||
;///////////////////// TEST 4 //////////////////////////////
|
||||
|
||||
; This test shows that without any attribute, this loop is fully unrolled
|
||||
; by default.
|
||||
; This test shows that this loop is fully unrolled by default.
|
||||
|
||||
@tab = common global [24 x i32] zeroinitializer, align 4
|
||||
|
||||
define i32 @Test4() {
|
||||
define i32 @Test3() {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
|
@ -106,7 +77,7 @@ for.end: ; preds = %for.body
|
|||
ret i32 42
|
||||
}
|
||||
|
||||
; CHECK_NOCOUNT-LABEL: @Test4
|
||||
; CHECK_NOCOUNT-LABEL: @Test3
|
||||
; CHECK_NOCOUNT: store
|
||||
; CHECK_NOCOUNT-NEXT: store
|
||||
; CHECK_NOCOUNT-NEXT: store
|
||||
|
@ -134,12 +105,11 @@ for.end: ; preds = %for.body
|
|||
; CHECK_NOCOUNT-NEXT: ret
|
||||
|
||||
|
||||
;///////////////////// TEST 5 //////////////////////////////
|
||||
;///////////////////// TEST 4 //////////////////////////////
|
||||
|
||||
; This test shows that with optsize attribute, this loop is not unrolled
|
||||
; by default.
|
||||
; This test shows that with optsize attribute, this loop is not unrolled.
|
||||
|
||||
define i32 @Test5() optsize {
|
||||
define i32 @Test4() optsize {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
|
@ -155,6 +125,6 @@ for.end: ; preds = %for.body
|
|||
ret i32 42
|
||||
}
|
||||
|
||||
; CHECK_NOCOUNT-LABEL: @Test5
|
||||
; CHECK_NOCOUNT-LABEL: @Test4
|
||||
; CHECK_NOCOUNT: phi
|
||||
; CHECK_NOCOUNT: icmp
|
||||
|
|
|
@ -108,6 +108,29 @@ for.end: ; preds = %for.body
|
|||
!3 = !{!3, !4}
|
||||
!4 = !{!"llvm.loop.unroll.full"}
|
||||
|
||||
; #pragma clang loop unroll(full)
|
||||
; Loop should be fully unrolled, even for optsize.
|
||||
;
|
||||
; CHECK-LABEL: @loop64_with_full_optsize(
|
||||
; CHECK-NOT: br i1
|
||||
define void @loop64_with_full_optsize(i32* nocapture %a) optsize {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%inc = add nsw i32 %0, 1
|
||||
store i32 %inc, i32* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 64
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
; #pragma clang loop unroll_count(4)
|
||||
; Loop should be unrolled 4 times.
|
||||
;
|
||||
|
|
Loading…
Reference in New Issue