forked from OSchip/llvm-project
Implement TTI getUnrollingPreferences for PowerPC
The PowerPC A2 core greatly benefits from aggressive concatenation unrolling; use the new getUnrollingPreferences to enable this by default when targeting the PPC A2 core. llvm-svn: 190549
This commit is contained in:
parent
3c2dacaf88
commit
71780ec4fd
|
@ -77,6 +77,7 @@ public:
|
|||
/// \name Scalar TTI Implementations
|
||||
/// @{
|
||||
virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
|
||||
virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
|
||||
|
||||
/// @}
|
||||
|
||||
|
@ -129,6 +130,14 @@ PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
|
|||
return PSK_Software;
|
||||
}
|
||||
|
||||
void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
|
||||
if (ST->getDarwinDirective() == PPC::DIR_A2) {
|
||||
// The A2 is in-order with a deep pipeline, and concatenation unrolling
|
||||
// helps expose latency-hiding opportunities to the instruction scheduler.
|
||||
UP.Partial = UP.Runtime = true;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
|
||||
if (Vector && !ST->hasAltivec())
|
||||
return 0;
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s
|
||||
define void @unroll_opt_for_size() nounwind optsize {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
|
||||
%inc = add i32 %iv, 1
|
||||
%exitcnd = icmp uge i32 %inc, 1024
|
||||
br i1 %exitcnd, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @unroll_opt_for_size
|
||||
; CHECK: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK-NEXT: add
|
||||
; CHECK: icmp
|
||||
|
||||
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
|
||||
entry:
|
||||
%cmp1 = icmp eq i32 %n, 0
|
||||
br i1 %cmp1, label %for.end, label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %0, %sum.02
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
|
||||
ret i32 %sum.0.lcssa
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @test
|
||||
; CHECK: unr.cmp{{.*}}:
|
||||
; CHECK: for.body.unr{{.*}}:
|
||||
; CHECK: for.body:
|
||||
; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
targets = set(config.root.targets_to_build.split())
|
||||
if not 'PowerPC' in targets:
|
||||
config.unsupported = True
|
||||
|
Loading…
Reference in New Issue