2016-08-01 13:56:57 +08:00
|
|
|
; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck <%t %s
|
Swap loop invariant GEP with loop variant GEP to allow more LICM.
This patch changes the order of GEPs generated by Splitting GEPs
pass, specially when one of the GEPs has constant and the base is
loop invariant, then we will generate the GEP with constant first
when beneficial, to expose more cases for LICM.
If originally Splitting GEP generate the following:
do.body.i:
%idxprom.i = sext i32 %shr.i to i64
%2 = bitcast %typeD* %s to i8*
%3 = shl i64 %idxprom.i, 2
%uglygep = getelementptr i8, i8* %2, i64 %3
%uglygep7 = getelementptr i8, i8* %uglygep, i64 1032
...
Now it genereates:
do.body.i:
%idxprom.i = sext i32 %shr.i to i64
%2 = bitcast %typeD* %s to i8*
%3 = shl i64 %idxprom.i, 2
%uglygep = getelementptr i8, i8* %2, i64 1032
%uglygep7 = getelementptr i8, i8* %uglygep, i64 %3
...
For no-loop cases, the original way of generating GEPs seems to
expose more CSE cases, so we don't change the logic for no-loop
cases, and only limit our change to the specific case we are
interested in.
llvm-svn: 248420
2015-09-24 03:25:30 +08:00
|
|
|
; REQUIRES: asserts
|
|
|
|
target triple = "aarch64--linux-android"
|
|
|
|
|
|
|
|
%typeD = type { i32, i32, [256 x i32], [257 x i32] }
|
|
|
|
|
|
|
|
; Function Attrs: noreturn nounwind uwtable
|
|
|
|
define i32 @test1(%typeD* nocapture %s) {
|
|
|
|
entry:
|
|
|
|
; CHECK-LABEL: entry:
|
|
|
|
; CHECK: %uglygep = getelementptr i8, i8* %0, i64 1032
|
|
|
|
; CHECK: br label %do.body.i
|
|
|
|
|
|
|
|
|
|
|
|
%tPos = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 0
|
|
|
|
%k0 = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 1
|
|
|
|
%.pre = load i32, i32* %tPos, align 4
|
|
|
|
br label %do.body.i
|
|
|
|
|
|
|
|
do.body.i:
|
|
|
|
; CHECK-LABEL: do.body.i:
|
|
|
|
; CHECK: %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3
|
|
|
|
; CHECK-NEXT: %4 = bitcast i8* %uglygep2 to i32*
|
|
|
|
; CHECK-NOT: %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032
|
|
|
|
|
|
|
|
|
|
|
|
%0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ]
|
|
|
|
%1 = phi i32 [ 0, %entry ], [ %.be6, %do.body.i.backedge ]
|
|
|
|
%add.i = add nsw i32 %1, %0
|
|
|
|
%shr.i = ashr i32 %add.i, 1
|
|
|
|
%idxprom.i = sext i32 %shr.i to i64
|
|
|
|
%arrayidx.i = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 3, i64 %idxprom.i
|
|
|
|
%2 = load i32, i32* %arrayidx.i, align 4
|
|
|
|
%cmp.i = icmp sle i32 %2, %.pre
|
|
|
|
%na.1.i = select i1 %cmp.i, i32 %0, i32 %shr.i
|
|
|
|
%nb.1.i = select i1 %cmp.i, i32 %shr.i, i32 %1
|
|
|
|
%sub.i = sub nsw i32 %na.1.i, %nb.1.i
|
|
|
|
%cmp1.i = icmp eq i32 %sub.i, 1
|
|
|
|
br i1 %cmp1.i, label %fooo.exit, label %do.body.i.backedge
|
|
|
|
|
|
|
|
do.body.i.backedge:
|
|
|
|
%.be = phi i32 [ %na.1.i, %do.body.i ], [ 256, %fooo.exit ]
|
|
|
|
%.be6 = phi i32 [ %nb.1.i, %do.body.i ], [ 0, %fooo.exit ]
|
|
|
|
br label %do.body.i
|
|
|
|
|
|
|
|
fooo.exit: ; preds = %do.body.i
|
|
|
|
store i32 %nb.1.i, i32* %k0, align 4
|
|
|
|
br label %do.body.i.backedge
|
|
|
|
}
|
|
|
|
|