2017-09-18 02:16:26 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
|
|
|
|
@a = common local_unnamed_addr global [1 x [10 x i32]] zeroinitializer, align 16
|
|
|
|
@c = common local_unnamed_addr global i32 0, align 4
|
|
|
|
@b = common local_unnamed_addr global [1 x [7 x i32]] zeroinitializer, align 16
|
|
|
|
|
|
|
|
; Function Attrs: norecurse nounwind uwtable
|
|
|
|
define void @fn1() local_unnamed_addr #0 {
|
|
|
|
; CHECK-LABEL: fn1:
|
|
|
|
; CHECK: # BB#0: # %entry
|
|
|
|
; CHECK-NEXT: movslq {{.*}}(%rip), %rax
|
|
|
|
; CHECK-NEXT: leaq (%rax,%rax,4), %rcx
|
|
|
|
; CHECK-NEXT: leaq (,%rax,4), %rdx
|
Revert r314886 "[X86] Improvement in CodeGen instruction selection for LEAs (re-applying post required revision changes.)"
It broke the Chromium / SQLite build; see PR34830.
> Summary:
> 1/ Operand folding during complex pattern matching for LEAs has been
> extended, such that it promotes Scale to accommodate similar operand
> appearing in the DAG.
> e.g.
> T1 = A + B
> T2 = T1 + 10
> T3 = T2 + A
> For above DAG rooted at T3, X86AddressMode will no look like
> Base = B , Index = A , Scale = 2 , Disp = 10
>
> 2/ During OptimizeLEAPass down the pipeline factorization is now performed over LEAs
> so that if there is an opportunity then complex LEAs (having 3 operands)
> could be factored out.
> e.g.
> leal 1(%rax,%rcx,1), %rdx
> leal 1(%rax,%rcx,2), %rcx
> will be factored as following
> leal 1(%rax,%rcx,1), %rdx
> leal (%rdx,%rcx) , %edx
>
> 3/ Aggressive operand folding for AM based selection for LEAs is sensitive to loops,
> thus avoiding creation of any complex LEAs within a loop.
>
> Reviewers: lsaba, RKSimon, craig.topper, qcolombet, jmolloy
>
> Reviewed By: lsaba
>
> Subscribers: jmolloy, spatel, igorb, llvm-commits
>
> Differential Revision: https://reviews.llvm.org/D35014
llvm-svn: 314919
2017-10-05 01:54:06 +08:00
|
|
|
; CHECK-NEXT: movl a(%rdx,%rcx,8), %ecx
|
|
|
|
; CHECK-NEXT: leaq (%rax,%rax,8), %rdx
|
|
|
|
; CHECK-NEXT: leaq (%rdx,%rdx,2), %rdx
|
|
|
|
; CHECK-NEXT: addq %rax, %rdx
|
|
|
|
; CHECK-NEXT: movl %ecx, b(%rdx,%rax,4)
|
2017-09-18 02:16:26 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%0 = load i32, i32* @c, align 4, !tbaa !2
|
|
|
|
%idxprom = sext i32 %0 to i64
|
|
|
|
%arrayidx2 = getelementptr inbounds [1 x [10 x i32]], [1 x [10 x i32]]* @a, i64 0, i64 %idxprom, i64 %idxprom
|
|
|
|
%1 = load i32, i32* %arrayidx2, align 4, !tbaa !2
|
|
|
|
%arrayidx6 = getelementptr inbounds [1 x [7 x i32]], [1 x [7 x i32]]* @b, i64 0, i64 %idxprom, i64 %idxprom
|
|
|
|
store i32 %1, i32* %arrayidx6, align 4, !tbaa !2
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Function Attrs: norecurse nounwind uwtable
|
|
|
|
define i32 @main() local_unnamed_addr #0 {
|
|
|
|
; CHECK-LABEL: main:
|
|
|
|
; CHECK: # BB#0: # %entry
|
|
|
|
; CHECK-NEXT: movslq {{.*}}(%rip), %rax
|
|
|
|
; CHECK-NEXT: leaq (%rax,%rax,4), %rcx
|
|
|
|
; CHECK-NEXT: leaq (,%rax,4), %rdx
|
Revert r314886 "[X86] Improvement in CodeGen instruction selection for LEAs (re-applying post required revision changes.)"
It broke the Chromium / SQLite build; see PR34830.
> Summary:
> 1/ Operand folding during complex pattern matching for LEAs has been
> extended, such that it promotes Scale to accommodate similar operand
> appearing in the DAG.
> e.g.
> T1 = A + B
> T2 = T1 + 10
> T3 = T2 + A
> For above DAG rooted at T3, X86AddressMode will no look like
> Base = B , Index = A , Scale = 2 , Disp = 10
>
> 2/ During OptimizeLEAPass down the pipeline factorization is now performed over LEAs
> so that if there is an opportunity then complex LEAs (having 3 operands)
> could be factored out.
> e.g.
> leal 1(%rax,%rcx,1), %rdx
> leal 1(%rax,%rcx,2), %rcx
> will be factored as following
> leal 1(%rax,%rcx,1), %rdx
> leal (%rdx,%rcx) , %edx
>
> 3/ Aggressive operand folding for AM based selection for LEAs is sensitive to loops,
> thus avoiding creation of any complex LEAs within a loop.
>
> Reviewers: lsaba, RKSimon, craig.topper, qcolombet, jmolloy
>
> Reviewed By: lsaba
>
> Subscribers: jmolloy, spatel, igorb, llvm-commits
>
> Differential Revision: https://reviews.llvm.org/D35014
llvm-svn: 314919
2017-10-05 01:54:06 +08:00
|
|
|
; CHECK-NEXT: movl a(%rdx,%rcx,8), %ecx
|
|
|
|
; CHECK-NEXT: leaq (%rax,%rax,8), %rdx
|
|
|
|
; CHECK-NEXT: leaq (%rdx,%rdx,2), %rdx
|
|
|
|
; CHECK-NEXT: addq %rax, %rdx
|
|
|
|
; CHECK-NEXT: movl %ecx, b(%rdx,%rax,4)
|
2017-09-18 02:16:26 +08:00
|
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%0 = load i32, i32* @c, align 4, !tbaa !2
|
|
|
|
%idxprom.i = sext i32 %0 to i64
|
|
|
|
%arrayidx2.i = getelementptr inbounds [1 x [10 x i32]], [1 x [10 x i32]]* @a, i64 0, i64 %idxprom.i, i64 %idxprom.i
|
|
|
|
%1 = load i32, i32* %arrayidx2.i, align 4, !tbaa !2
|
|
|
|
%arrayidx6.i = getelementptr inbounds [1 x [7 x i32]], [1 x [7 x i32]]* @b, i64 0, i64 %idxprom.i, i64 %idxprom.i
|
|
|
|
store i32 %1, i32* %arrayidx6.i, align 4, !tbaa !2
|
|
|
|
ret i32 0
|
|
|
|
}
|
|
|
|
|
|
|
|
attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
|
|
|
|
|
|
!llvm.module.flags = !{!0}
|
|
|
|
!llvm.ident = !{!1}
|
|
|
|
|
|
|
|
!0 = !{i32 1, !"wchar_size", i32 4}
|
|
|
|
!1 = !{!"clang version 6.0.0 "}
|
|
|
|
!2 = !{!3, !3, i64 0}
|
|
|
|
!3 = !{!"int", !4, i64 0}
|
|
|
|
!4 = !{!"omnipotent char", !5, i64 0}
|
|
|
|
!5 = !{!"Simple C/C++ TBAA"}
|