forked from OSchip/llvm-project
Change max interleave factor to 12 for POWER7 and POWER8.
llvm-svn: 228973
This commit is contained in:
parent
b4a0df9a4a
commit
05e69157b6
|
@ -226,6 +226,12 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor() {
|
|||
if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
|
||||
return 1;
|
||||
|
||||
// For P7 and P8, floating-point instructions have a 6-cycle latency and
|
||||
// there are two execution units, so unroll by 12x for latency hiding.
|
||||
if (Directive == PPC::DIR_PWR7 ||
|
||||
Directive == PPC::DIR_PWR8)
|
||||
return 12;
|
||||
|
||||
// For most things, modern systems have two execution units (and
|
||||
// out-of-order execution).
|
||||
return 2;
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
; RUN: opt < %s -loop-vectorize -S -debug < %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: LV: Unroll Factor is 12
|
||||
|
||||
target datalayout = "e-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64le-ibm-linux-gnu"
|
||||
|
||||
define void @test(double* nocapture readonly %arr, i32 signext %len) #0 {
|
||||
entry:
|
||||
%cmp4 = icmp sgt i32 %len, 0
|
||||
br i1 %cmp4, label %for.body.lr.ph, label %for.end
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
%0 = add i32 %len, -1
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %for.body.lr.ph
|
||||
%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
|
||||
%redx.05 = phi double [ 0.000000e+00, %for.body.lr.ph ], [ %add, %for.body ]
|
||||
%arrayidx = getelementptr inbounds double* %arr, i64 %indvars.iv
|
||||
%1 = load double* %arrayidx, align 8
|
||||
%add = fadd fast double %1, %redx.05
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %0
|
||||
br i1 %exitcond, label %for.end.loopexit, label %for.body
|
||||
|
||||
for.end.loopexit: ; preds = %for.body
|
||||
%add.lcssa = phi double [ %add, %for.body ]
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
%redx.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %for.end.loopexit ]
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue