[x86] auto-generate complete checks; NFC

llvm-svn: 323571
This commit is contained in:
Sanjay Patel 2018-01-26 22:06:07 +00:00
parent e48597a50e
commit 5bce08ddff
3 changed files with 443 additions and 93 deletions

View File

@ -1,27 +1,70 @@
; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=X64
; RUN: llc < %s -O3 -march=x86 -mcpu=core2 | FileCheck %s -check-prefix=X32
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O3 -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s -check-prefix=X64
; RUN: llc < %s -O3 -mtriple=i686-unknown-unknown -mcpu=core2 | FileCheck %s -check-prefix=X32
; @simple is the most basic chain of address induction variables. Chaining
; saves at least one register and avoids complex addressing and setup
; code.
;
; X64: @simple
; %x * 4
; X64: shlq $2
; no other address computation in the preheader
; X64-NEXT: xorl
; X64-NEXT: .p2align
; X64: %loop
; no complex address modes
; X64-NOT: (%{{[^)]+}},%{{[^)]+}},
;
; X32: @simple
; no expensive address computation in the preheader
; X32-NOT: imul
; X32: %loop
; no complex address modes
; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
; X64-LABEL: simple:
; X64: # %bb.0: # %entry
; X64-NEXT: movslq %edx, %rcx
; X64-NEXT: shlq $2, %rcx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB0_1: # %loop
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: addl (%rdi), %eax
; X64-NEXT: leaq (%rdi,%rcx), %r8
; X64-NEXT: addl (%rdi,%rcx), %eax
; X64-NEXT: leaq (%r8,%rcx), %rdx
; X64-NEXT: addl (%rcx,%r8), %eax
; X64-NEXT: addl (%rcx,%rdx), %eax
; X64-NEXT: addq %rcx, %rdx
; X64-NEXT: addq %rcx, %rdx
; X64-NEXT: movq %rdx, %rdi
; X64-NEXT: cmpq %rsi, %rdx
; X64-NEXT: jne .LBB0_1
; X64-NEXT: # %bb.2: # %exit
; X64-NEXT: retq
;
; X32-LABEL: simple:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: shll $2, %edx
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB0_1: # %loop
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: addl (%esi), %eax
; X32-NEXT: leal (%esi,%edx), %edi
; X32-NEXT: addl (%esi,%edx), %eax
; X32-NEXT: leal (%edi,%edx), %ebx
; X32-NEXT: addl (%edx,%edi), %eax
; X32-NEXT: addl (%edx,%ebx), %eax
; X32-NEXT: addl %edx, %ebx
; X32-NEXT: addl %edx, %ebx
; X32-NEXT: movl %ebx, %esi
; X32-NEXT: cmpl %ecx, %ebx
; X32-NEXT: jne .LBB0_1
; X32-NEXT: # %bb.2: # %exit
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
; X32-NEXT: retl
entry:
br label %loop
loop:
@ -47,23 +90,60 @@ exit:
; @user is not currently chained because the IV is live across memory ops.
;
; X64: @user
; X64: shlq $4
; X64: lea
; X64: lea
; X64: %loop
; complex address modes
; X64: (%{{[^)]+}},%{{[^)]+}},
;
; X32: @user
; expensive address computation in the preheader
; X32: shll $4
; X32: lea
; X32: lea
; X32: %loop
; complex address modes
; X32: (%{{[^)]+}},%{{[^)]+}},
define i32 @user(i32* %a, i32* %b, i32 %x) nounwind {
; X64-LABEL: user:
; X64: # %bb.0: # %entry
; X64-NEXT: movslq %edx, %rcx
; X64-NEXT: movq %rcx, %rdx
; X64-NEXT: shlq $4, %rdx
; X64-NEXT: leaq (,%rcx,4), %rax
; X64-NEXT: leaq (%rax,%rax,2), %r8
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB1_1: # %loop
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: addl (%rdi), %eax
; X64-NEXT: addl (%rdi,%rcx,4), %eax
; X64-NEXT: addl (%rdi,%rcx,8), %eax
; X64-NEXT: addl (%rdi,%r8), %eax
; X64-NEXT: movl %eax, (%rdi)
; X64-NEXT: addq %rdx, %rdi
; X64-NEXT: cmpq %rdi, %rsi
; X64-NEXT: jne .LBB1_1
; X64-NEXT: # %bb.2: # %exit
; X64-NEXT: retq
;
; X32-LABEL: user:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl %ecx, %edi
; X32-NEXT: shll $4, %edi
; X32-NEXT: leal (,%ecx,4), %eax
; X32-NEXT: leal (%eax,%eax,2), %ebx
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB1_1: # %loop
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: addl (%esi), %eax
; X32-NEXT: addl (%esi,%ecx,4), %eax
; X32-NEXT: addl (%esi,%ecx,8), %eax
; X32-NEXT: addl (%esi,%ebx), %eax
; X32-NEXT: movl %eax, (%esi)
; X32-NEXT: addl %edi, %esi
; X32-NEXT: cmpl %esi, %edx
; X32-NEXT: jne .LBB1_1
; X32-NEXT: # %bb.2: # %exit
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
; X32-NEXT: retl
entry:
br label %loop
loop:
@ -93,20 +173,94 @@ exit:
; used to do, and exactly what we don't want to do. LSR's new IV
; chaining feature should now undo the damage.
;
; X64: extrastride:
; We currently don't handle this on X64 because the sexts cause
; strange increment expressions like this:
; IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
;
; X32: extrastride:
; no spills in the preheader
; X32-NOT: mov{{.*}}(%esp){{$}}
; X32: %for.body{{$}}
; no complex address modes
; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
; no reloads
; X32-NOT: (%esp)
; For x32, no spills in the preheader, no complex address modes, no reloads.
define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
; X64-LABEL: extrastride:
; X64: # %bb.0: # %entry
; X64-NEXT: pushq %rbp
; X64-NEXT: pushq %r14
; X64-NEXT: pushq %rbx
; X64-NEXT: # kill: def %ecx killed %ecx def %rcx
; X64-NEXT: # kill: def %esi killed %esi def %rsi
; X64-NEXT: testl %r9d, %r9d
; X64-NEXT: je .LBB2_3
; X64-NEXT: # %bb.1: # %for.body.lr.ph
; X64-NEXT: leal (%rsi,%rsi), %r14d
; X64-NEXT: leal (%rsi,%rsi,2), %ebx
; X64-NEXT: addl %esi, %ecx
; X64-NEXT: leal (,%rsi,4), %eax
; X64-NEXT: leal (%rcx,%rsi,4), %ebp
; X64-NEXT: movslq %eax, %r10
; X64-NEXT: movslq %ebx, %r11
; X64-NEXT: movslq %r14d, %rbx
; X64-NEXT: movslq %esi, %rsi
; X64-NEXT: movslq %r8d, %rcx
; X64-NEXT: shlq $2, %rcx
; X64-NEXT: movslq %ebp, %rax
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB2_2: # %for.body
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl (%rdi,%rsi), %ebp
; X64-NEXT: addl (%rdi), %ebp
; X64-NEXT: addl (%rdi,%rbx), %ebp
; X64-NEXT: addl (%rdi,%r11), %ebp
; X64-NEXT: addl (%rdi,%r10), %ebp
; X64-NEXT: movl %ebp, (%rdx)
; X64-NEXT: addq %rax, %rdi
; X64-NEXT: addq %rcx, %rdx
; X64-NEXT: decl %r9d
; X64-NEXT: jne .LBB2_2
; X64-NEXT: .LBB2_3: # %for.end
; X64-NEXT: popq %rbx
; X64-NEXT: popq %r14
; X64-NEXT: popq %rbp
; X64-NEXT: retq
;
; X32-LABEL: extrastride:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebp
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: testl %eax, %eax
; X32-NEXT: je .LBB2_3
; X32-NEXT: # %bb.1: # %for.body.lr.ph
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-NEXT: addl %esi, %edi
; X32-NEXT: shll $2, %ecx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB2_2: # %for.body
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl (%ebx,%esi), %ebp
; X32-NEXT: addl (%ebx), %ebp
; X32-NEXT: leal (%ebx,%esi), %ebx
; X32-NEXT: addl (%esi,%ebx), %ebp
; X32-NEXT: leal (%ebx,%esi), %ebx
; X32-NEXT: addl (%esi,%ebx), %ebp
; X32-NEXT: leal (%ebx,%esi), %ebx
; X32-NEXT: addl (%esi,%ebx), %ebp
; X32-NEXT: movl %ebp, (%edx)
; X32-NEXT: leal (%ebx,%esi), %ebx
; X32-NEXT: addl %edi, %ebx
; X32-NEXT: addl %ecx, %edx
; X32-NEXT: decl %eax
; X32-NEXT: jne .LBB2_2
; X32-NEXT: .LBB2_3: # %for.end
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: retl
entry:
%cmp8 = icmp eq i32 %z, 0
br i1 %cmp8, label %for.end, label %for.body.lr.ph
@ -158,13 +312,71 @@ for.end: ; preds = %for.body, %entry
; }
; where 's' can be folded into the addressing mode.
; Consequently, we should *not* form any chains.
;
; X64: foldedidx:
; X64: movzbl -3(
;
; X32: foldedidx:
; X32: movzbl 400(
define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
; X64-LABEL: foldedidx:
; X64: # %bb.0: # %entry
; X64-NEXT: movl $3, %eax
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB3_1: # %for.body
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movzbl -3(%rdi,%rax), %r8d
; X64-NEXT: movzbl -3(%rsi,%rax), %ecx
; X64-NEXT: addl %r8d, %ecx
; X64-NEXT: movb %cl, -3(%rdx,%rax)
; X64-NEXT: movzbl -2(%rdi,%rax), %r8d
; X64-NEXT: movzbl -2(%rsi,%rax), %ecx
; X64-NEXT: addl %r8d, %ecx
; X64-NEXT: movb %cl, -2(%rdx,%rax)
; X64-NEXT: movzbl -1(%rdi,%rax), %r8d
; X64-NEXT: movzbl -1(%rsi,%rax), %ecx
; X64-NEXT: addl %r8d, %ecx
; X64-NEXT: movb %cl, -1(%rdx,%rax)
; X64-NEXT: movzbl (%rdi,%rax), %r8d
; X64-NEXT: movzbl (%rsi,%rax), %ecx
; X64-NEXT: addl %r8d, %ecx
; X64-NEXT: movb %cl, (%rdx,%rax)
; X64-NEXT: addq $4, %rax
; X64-NEXT: cmpl $403, %eax # imm = 0x193
; X64-NEXT: jne .LBB3_1
; X64-NEXT: # %bb.2: # %for.end
; X64-NEXT: retq
;
; X32-LABEL: foldedidx:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: movl $-400, %eax # imm = 0xFE70
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB3_1: # %for.body
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movzbl 400(%esi,%eax), %edi
; X32-NEXT: movzbl 400(%edx,%eax), %ebx
; X32-NEXT: addl %edi, %ebx
; X32-NEXT: movb %bl, 400(%ecx,%eax)
; X32-NEXT: movzbl 401(%esi,%eax), %edi
; X32-NEXT: movzbl 401(%edx,%eax), %ebx
; X32-NEXT: addl %edi, %ebx
; X32-NEXT: movb %bl, 401(%ecx,%eax)
; X32-NEXT: movzbl 402(%esi,%eax), %edi
; X32-NEXT: movzbl 402(%edx,%eax), %ebx
; X32-NEXT: addl %edi, %ebx
; X32-NEXT: movb %bl, 402(%ecx,%eax)
; X32-NEXT: movzbl 403(%esi,%eax), %edi
; X32-NEXT: movzbl 403(%edx,%eax), %ebx
; X32-NEXT: addl %edi, %ebx
; X32-NEXT: movb %bl, 403(%ecx,%eax)
; X32-NEXT: addl $4, %eax
; X32-NEXT: jne .LBB3_1
; X32-NEXT: # %bb.2: # %for.end
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
; X32-NEXT: retl
entry:
br label %for.body
@ -223,27 +435,49 @@ for.end: ; preds = %for.body
; @multioper tests instructions with multiple IV user operands. We
; should be able to chain them independent of each other.
;
; X64: @multioper
; X64: %for.body
; X64: movl %{{.*}},4)
; X64-NEXT: leal 1(
; X64-NEXT: movl %{{.*}},4)
; X64-NEXT: leal 2(
; X64-NEXT: movl %{{.*}},4)
; X64-NEXT: leal 3(
; X64-NEXT: movl %{{.*}},4)
;
; X32: @multioper
; X32: %for.body
; X32: movl %{{.*}},4)
; X32-NEXT: leal 1(
; X32-NEXT: movl %{{.*}},4)
; X32-NEXT: leal 2(
; X32-NEXT: movl %{{.*}},4)
; X32-NEXT: leal 3(
; X32-NEXT: movl %{{.*}},4)
define void @multioper(i32* %a, i32 %n) nounwind {
; X64-LABEL: multioper:
; X64: # %bb.0: # %entry
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB4_1: # %for.body
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl %eax, (%rdi,%rax,4)
; X64-NEXT: leal 1(%rax), %ecx
; X64-NEXT: movl %ecx, 4(%rdi,%rax,4)
; X64-NEXT: leal 2(%rax), %ecx
; X64-NEXT: movl %ecx, 8(%rdi,%rax,4)
; X64-NEXT: leal 3(%rax), %ecx
; X64-NEXT: movl %ecx, 12(%rdi,%rax,4)
; X64-NEXT: addq $4, %rax
; X64-NEXT: cmpl %esi, %eax
; X64-NEXT: jl .LBB4_1
; X64-NEXT: # %bb.2: # %exit
; X64-NEXT: retq
;
; X32-LABEL: multioper:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %esi
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB4_1: # %for.body
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl %eax, (%edx,%eax,4)
; X32-NEXT: leal 1(%eax), %esi
; X32-NEXT: movl %esi, 4(%edx,%eax,4)
; X32-NEXT: leal 2(%eax), %esi
; X32-NEXT: movl %esi, 8(%edx,%eax,4)
; X32-NEXT: leal 3(%eax), %esi
; X32-NEXT: movl %esi, 12(%edx,%eax,4)
; X32-NEXT: addl $4, %eax
; X32-NEXT: cmpl %ecx, %eax
; X32-NEXT: jl .LBB4_1
; X32-NEXT: # %bb.2: # %exit
; X32-NEXT: popl %esi
; X32-NEXT: retl
entry:
br label %for.body
@ -272,12 +506,51 @@ exit:
; @testCmpZero has a ICmpZero LSR use that should not be hidden from
; LSR. Profitable chains should have more than one nonzero increment
; anyway.
;
; X32: @testCmpZero
; X32: %for.body82.us
; X32: cmp
; X32: jne
define void @testCmpZero(i8* %src, i8* %dst, i32 %srcidx, i32 %dstidx, i32 %len) nounwind ssp {
; X64-LABEL: testCmpZero:
; X64: # %bb.0: # %entry
; X64-NEXT: movslq %edx, %rdx
; X64-NEXT: addq %rdx, %rdi
; X64-NEXT: movslq %ecx, %r9
; X64-NEXT: addq %rsi, %r9
; X64-NEXT: addl %edx, %r8d
; X64-NEXT: movslq %r8d, %rcx
; X64-NEXT: subq %rdx, %rcx
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB5_1: # %for.body82.us
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movzbl (%r9,%rdx,4), %eax
; X64-NEXT: movb %al, (%rdi,%rdx)
; X64-NEXT: incq %rdx
; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB5_1
; X64-NEXT: # %bb.2: # %return
; X64-NEXT: retq
;
; X32-LABEL: testCmpZero:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: addl {{[0-9]+}}(%esp), %edx
; X32-NEXT: xorl %esi, %esi
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB5_1: # %for.body82.us
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movzbl (%edx,%esi,4), %ebx
; X32-NEXT: movb %bl, (%ecx,%esi)
; X32-NEXT: incl %esi
; X32-NEXT: cmpl %esi, %eax
; X32-NEXT: jne .LBB5_1
; X32-NEXT: # %bb.2: # %return
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: retl
entry:
%dest0 = getelementptr inbounds i8, i8* %src, i32 %srcidx
%source0 = getelementptr inbounds i8, i8* %dst, i32 %dstidx

View File

@ -1,13 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
; RUN: llc < %s -O2 -march=x86-64 -lsr-insns-cost -asm-verbose=0 | FileCheck %s
; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -lsr-insns-cost | FileCheck %s
; OPT test checks that LSR optimize compare for static counter to compare with 0.
; BOTH: for.body:
; INSN: icmp eq i64 %lsr.iv.next, 0
; REGS: icmp eq i64 %indvars.iv.next, 1024
; LLC test checks that LSR optimize compare for static counter.
; That means that instead of creating the following:
; movl %ecx, (%rdx,%rax,4)
@ -20,17 +18,67 @@
; movl %ecx, 4096(%rdx,%rax,4)
; incq %rax
; CHECK: LBB0_1:
; CHECK-NEXT: movl 4096(%{{.+}},[[REG:%[0-9a-z]+]]
; CHECK-NEXT: addl 4096(%{{.+}},[[REG]]
; CHECK-NEXT: movl %{{.+}}, 4096(%{{.+}},[[REG]]
; CHECK-NOT: cmp
; CHECK: jne
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: norecurse nounwind uwtable
define void @foo(i32* nocapture readonly %x, i32* nocapture readonly %y, i32* nocapture %q) {
; INSN-LABEL: @foo(
; INSN-NEXT: entry:
; INSN-NEXT: [[Q1:%.*]] = bitcast i32* [[Q:%.*]] to i8*
; INSN-NEXT: [[Y3:%.*]] = bitcast i32* [[Y:%.*]] to i8*
; INSN-NEXT: [[X7:%.*]] = bitcast i32* [[X:%.*]] to i8*
; INSN-NEXT: br label [[FOR_BODY:%.*]]
; INSN: for.cond.cleanup:
; INSN-NEXT: ret void
; INSN: for.body:
; INSN-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ -4096, [[ENTRY:%.*]] ]
; INSN-NEXT: [[UGLYGEP8:%.*]] = getelementptr i8, i8* [[X7]], i64 [[LSR_IV]]
; INSN-NEXT: [[UGLYGEP89:%.*]] = bitcast i8* [[UGLYGEP8]] to i32*
; INSN-NEXT: [[SCEVGEP10:%.*]] = getelementptr i32, i32* [[UGLYGEP89]], i64 1024
; INSN-NEXT: [[TMP:%.*]] = load i32, i32* [[SCEVGEP10]], align 4
; INSN-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, i8* [[Y3]], i64 [[LSR_IV]]
; INSN-NEXT: [[UGLYGEP45:%.*]] = bitcast i8* [[UGLYGEP4]] to i32*
; INSN-NEXT: [[SCEVGEP6:%.*]] = getelementptr i32, i32* [[UGLYGEP45]], i64 1024
; INSN-NEXT: [[TMP1:%.*]] = load i32, i32* [[SCEVGEP6]], align 4
; INSN-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP]]
; INSN-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Q1]], i64 [[LSR_IV]]
; INSN-NEXT: [[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to i32*
; INSN-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[UGLYGEP2]], i64 1024
; INSN-NEXT: store i32 [[ADD]], i32* [[SCEVGEP]], align 4
; INSN-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 4
; INSN-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; INSN-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
;
; REGS-LABEL: @foo(
; REGS-NEXT: entry:
; REGS-NEXT: br label [[FOR_BODY:%.*]]
; REGS: for.cond.cleanup:
; REGS-NEXT: ret void
; REGS: for.body:
; REGS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; REGS-NEXT: [[SCEVGEP2:%.*]] = getelementptr i32, i32* [[X:%.*]], i64 [[INDVARS_IV]]
; REGS-NEXT: [[TMP:%.*]] = load i32, i32* [[SCEVGEP2]], align 4
; REGS-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[Y:%.*]], i64 [[INDVARS_IV]]
; REGS-NEXT: [[TMP1:%.*]] = load i32, i32* [[SCEVGEP1]], align 4
; REGS-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP]]
; REGS-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDVARS_IV]]
; REGS-NEXT: store i32 [[ADD]], i32* [[SCEVGEP]], align 4
; REGS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; REGS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
; REGS-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
;
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_1: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl 4096(%rsi,%rax), %ecx
; CHECK-NEXT: addl 4096(%rdi,%rax), %ecx
; CHECK-NEXT: movl %ecx, 4096(%rdx,%rax)
; CHECK-NEXT: addq $4, %rax
; CHECK-NEXT: jne .LBB0_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-NEXT: retq
entry:
br label %for.body
@ -50,3 +98,4 @@ for.body: ; preds = %for.body, %entry
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}

View File

@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-reduce -S < %s | FileCheck %s
; Check when we use an outerloop induction variable inside of an innerloop
; induction value expr, LSR can still choose to use single induction variable
; for the innerloop and share it in multiple induction value exprs.
@ -8,6 +9,46 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @foo(i32 %size, i32 %nsteps, i32 %hsize, i32* %lined, i8* %maxarray) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP215:%.*]] = icmp sgt i32 [[SIZE:%.*]], 1
; CHECK-NEXT: [[T0:%.*]] = zext i32 [[SIZE]] to i64
; CHECK-NEXT: [[T1:%.*]] = sext i32 [[NSTEPS:%.*]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[T0]], -1
; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to i8*
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], [[FOR_INC:%.*]] ], [ 1, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV13:%.*]] = inttoptr i64 [[LSR_IV1]] to i8*
; CHECK-NEXT: br i1 [[CMP215]], label [[FOR_BODY2_PREHEADER:%.*]], label [[FOR_INC]]
; CHECK: for.body2.preheader:
; CHECK-NEXT: br label [[FOR_BODY2:%.*]]
; CHECK: for.body2:
; CHECK-NEXT: [[LSR_IV4:%.*]] = phi i8* [ [[SCEVGEP:%.*]], [[FOR_BODY2]] ], [ [[MAXARRAY:%.*]], [[FOR_BODY2_PREHEADER]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY2]] ], [ [[TMP0]], [[FOR_BODY2_PREHEADER]] ]
; CHECK-NEXT: [[LSR_IV45:%.*]] = ptrtoint i8* [[LSR_IV4]] to i64
; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, i8* [[LSR_IV4]], i64 1
; CHECK-NEXT: [[V1:%.*]] = load i8, i8* [[SCEVGEP8]], align 1
; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i8, i8* [[TMP1]], i64 [[LSR_IV45]]
; CHECK-NEXT: [[V2:%.*]] = load i8, i8* [[SCEVGEP7]], align 1
; CHECK-NEXT: [[TMPV:%.*]] = xor i8 [[V1]], [[V2]]
; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, i8* [[LSR_IV13]], i64 [[LSR_IV45]]
; CHECK-NEXT: store i8 [[TMPV]], i8* [[SCEVGEP6]], align 1
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, i8* [[LSR_IV4]], i64 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[LSR_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY2]], label [[FOR_INC_LOOPEXIT:%.*]]
; CHECK: for.inc.loopexit:
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: for.inc:
; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
; CHECK-NEXT: [[LSR_IV_NEXT2]] = add nuw nsw i64 [[LSR_IV1]], [[T0]]
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT3]], [[T1]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: ret void
;
entry:
%cmp215 = icmp sgt i32 %size, 1
%t0 = zext i32 %size to i64
@ -25,20 +66,6 @@ for.body2.preheader: ; preds = %for.body
; Check LSR only generates two induction variables for for.body2 one for compare and
; one to shared by multiple array accesses.
; CHECK: for.body2:
; CHECK-NEXT: [[LSRAR:%[^,]+]] = phi i8* [ %scevgep, %for.body2 ], [ %maxarray, %for.body2.preheader ]
; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ %lsr.iv.next, %for.body2 ], [ %0, %for.body2.preheader ]
; CHECK-NOT: = phi i64 [ {{.*}}, %for.body2 ], [ {{.*}}, %for.body2.preheader ]
; CHECK: [[LSRINT:%[^,]+]] = ptrtoint i8* [[LSRAR]] to i64
; CHECK: [[SCEVGEP1:%[^,]+]] = getelementptr i8, i8* [[LSRAR]], i64 1
; CHECK: {{.*}} = load i8, i8* [[SCEVGEP1]], align 1
; CHECK: [[SCEVGEP2:%[^,]+]] = getelementptr i8, i8* %1, i64 [[LSRINT]]
; CHECK: {{.*}} = load i8, i8* [[SCEVGEP2]], align 1
; CHECK: [[SCEVGEP3:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSRINT]]
; CHECK: store i8 {{.*}}, i8* [[SCEVGEP3]], align 1
; CHECK: [[LSRNEXT:%[^,]+]] = add i64 [[LSR]], -1
; CHECK: %exitcond = icmp ne i64 [[LSRNEXT]], 0
; CHECK: br i1 %exitcond, label %for.body2, label %for.inc.loopexit
for.body2: ; preds = %for.body2.preheader, %for.body2
%indvars.iv = phi i64 [ 1, %for.body2.preheader ], [ %indvars.iv.next, %for.body2 ]
@ -67,3 +94,4 @@ for.inc: ; preds = %for.inc.loopexit, %
for.end.loopexit: ; preds = %for.inc
ret void
}