forked from OSchip/llvm-project
Introduce -unroll-partial-threshold to separate PartialThreshold from Threshold in loop unorller.
Summary: Partial unrolling should have separate threshold with full unrolling. Reviewers: efriedma, mzolotukhin Reviewed By: efriedma, mzolotukhin Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D28831 llvm-svn: 292293
This commit is contained in:
parent
8f4178a59e
commit
c3f87f02b1
|
@ -44,7 +44,11 @@ using namespace llvm;
|
|||
|
||||
static cl::opt<unsigned>
|
||||
UnrollThreshold("unroll-threshold", cl::Hidden,
|
||||
cl::desc("The baseline cost threshold for loop unrolling"));
|
||||
cl::desc("The cost threshold for loop unrolling"));
|
||||
|
||||
static cl::opt<unsigned> UnrollPartialThreshold(
|
||||
"unroll-partial-threshold", cl::Hidden,
|
||||
cl::desc("The cost threshold for partial loop unrolling"));
|
||||
|
||||
static cl::opt<unsigned> UnrollMaxPercentThresholdBoost(
|
||||
"unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
|
||||
|
@ -127,7 +131,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
|
|||
UP.Threshold = 150;
|
||||
UP.MaxPercentThresholdBoost = 400;
|
||||
UP.OptSizeThreshold = 0;
|
||||
UP.PartialThreshold = UP.Threshold;
|
||||
UP.PartialThreshold = 150;
|
||||
UP.PartialOptSizeThreshold = 0;
|
||||
UP.Count = 0;
|
||||
UP.PeelCount = 0;
|
||||
|
@ -153,10 +157,10 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
|
|||
}
|
||||
|
||||
// Apply any user values specified by cl::opt
|
||||
if (UnrollThreshold.getNumOccurrences() > 0) {
|
||||
if (UnrollThreshold.getNumOccurrences() > 0)
|
||||
UP.Threshold = UnrollThreshold;
|
||||
UP.PartialThreshold = UnrollThreshold;
|
||||
}
|
||||
if (UnrollPartialThreshold.getNumOccurrences() > 0)
|
||||
UP.PartialThreshold = UnrollPartialThreshold;
|
||||
if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
|
||||
UP.MaxPercentThresholdBoost = UnrollMaxPercentThresholdBoost;
|
||||
if (UnrollMaxCount.getNumOccurrences() > 0)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -basicaa -loop-unroll -unroll-partial-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
|
||||
; The second check covers the use of alias analysis (with loop unrolling).
|
||||
|
||||
define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
|
||||
; RUN: opt < %s -dont-improve-non-negative-phi-bits=false -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
|
||||
; RUN: opt < %s -dont-improve-non-negative-phi-bits=false -basicaa -loop-unroll -unroll-threshold=45 -unroll-partial-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
|
||||
; The second check covers the use of alias analysis (with loop unrolling).
|
||||
|
||||
define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt < %s -S -unroll-threshold=20 -loop-unroll -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-max-percent-threshold-boost=100 | FileCheck %s
|
||||
; RUN: opt < %s -S -unroll-partial-threshold=20 -unroll-threshold=20 -loop-unroll -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-max-percent-threshold-boost=100 | FileCheck %s
|
||||
|
||||
; The Loop TripCount is 9. However unroll factors 3 or 9 exceed given threshold.
|
||||
; The test checks that we choose a smaller, power-of-two, unroll count and do not give up on unrolling.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-runtime-epilog=true -unroll-count=8 | FileCheck %s -check-prefix=EPILOG
|
||||
; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
|
||||
; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=true -unroll-count=8 | FileCheck %s -check-prefix=EPILOG
|
||||
; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
|
||||
|
||||
; Choose a smaller, power-of-two, unroll count if the loop is too large.
|
||||
; This test makes sure we're not unrolling 'odd' counts
|
||||
|
|
Loading…
Reference in New Issue