2020-11-19 04:54:54 +08:00
|
|
|
; RUN: opt < %s -S -passes="default<O2>" -unroll-runtime=true -enable-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=O2
|
|
|
|
; RUN: opt < %s -S -passes="default<O3>" -unroll-runtime=true -enable-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=O3
|
|
|
|
; RUN: opt < %s -S -passes="default<Os>" -unroll-runtime=true -enable-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=Os
|
|
|
|
; RUN: opt < %s -S -passes="default<Oz>" -unroll-runtime=true -enable-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=Oz
|
[llvm] Make new pass manager's OptimizationLevel a class
Summary:
The old pass manager separated speed optimization and size optimization
levels into two unsigned values. Coallescing both in an enum in the new
pass manager may lead to unintentional casts and comparisons.
In particular, taking a look at how the loop unroll passes were constructed
previously, the Os/Oz are now (==new pass manager) treated just like O3,
likely unintentionally.
This change disallows raw comparisons between optimization levels, to
avoid such unintended effects. As an effect, the O{s|z} behavior changes
for loop unrolling and loop unroll and jam, matching O2 rather than O3.
The change also parameterizes the threshold values used for loop
unrolling, primarily to aid testing.
Reviewers: tejohnson, davidxl
Reviewed By: tejohnson
Subscribers: zzheng, ychen, mehdi_amini, hiraditya, steven_wu, dexonsmith, dang, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D72547
2020-01-17 00:51:50 +08:00
|
|
|
|
|
|
|
; Check that Os and Oz are optimized like O2, not like O3. To easily highlight
|
|
|
|
; the behavior, we artificially disable unrolling for anything but O3 by setting
|
|
|
|
; the default threshold to 0.
|
|
|
|
|
|
|
|
; O3: for.inner.1
|
|
|
|
; O2-NOT: for.inner.1
|
|
|
|
; Os-NOT: for.inner.1
|
|
|
|
; Oz-NOT: for.inner.1
|
|
|
|
|
|
|
|
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
|
|
|
|
|
|
|
|
define void @test1(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
|
|
|
|
entry:
|
|
|
|
%cmp = icmp ne i32 %J, 0
|
|
|
|
%cmpJ = icmp ne i32 %I, 0
|
|
|
|
%or.cond = and i1 %cmp, %cmpJ
|
|
|
|
br i1 %or.cond, label %for.outer.preheader, label %for.end
|
|
|
|
|
|
|
|
for.outer.preheader:
|
|
|
|
br label %for.outer
|
|
|
|
|
|
|
|
for.outer:
|
|
|
|
%i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
|
|
|
|
br label %for.inner
|
|
|
|
|
|
|
|
for.inner:
|
|
|
|
%j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
|
|
|
|
%sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
|
|
|
|
%arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
|
|
|
|
%0 = load i32, i32* %arrayidx, align 4, !tbaa !5
|
|
|
|
%add = add i32 %0, %sum
|
|
|
|
%inc = add nuw i32 %j, 1
|
|
|
|
%exitcond = icmp eq i32 %inc, %J
|
|
|
|
br i1 %exitcond, label %for.latch, label %for.inner
|
|
|
|
|
|
|
|
for.latch:
|
|
|
|
%add.lcssa = phi i32 [ %add, %for.inner ]
|
|
|
|
%arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
|
|
|
|
store i32 %add.lcssa, i32* %arrayidx6, align 4, !tbaa !5
|
|
|
|
%add8 = add nuw i32 %i, 1
|
|
|
|
%exitcond25 = icmp eq i32 %add8, %I
|
|
|
|
br i1 %exitcond25, label %for.end.loopexit, label %for.outer
|
|
|
|
|
|
|
|
for.end.loopexit:
|
|
|
|
br label %for.end
|
|
|
|
|
|
|
|
for.end:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
!5 = !{!6, !6, i64 0}
|
|
|
|
!6 = !{!"int", !7, i64 0}
|
|
|
|
!7 = !{!"omnipotent char", !8, i64 0}
|
|
|
|
!8 = !{!"Simple C/C++ TBAA"}
|