llvm-project/llvm/test/CodeGen/X86/lea-opt-cse2.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown   | FileCheck %s -check-prefix=X86

%struct.SA = type { i32 , i32 , i32 , i32 , i32};

define void @foo(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0 {
; X64-LABEL: foo:
; X64:       # %bb.0: # %entry
; X64-NEXT:    .p2align 4, 0x90
; X64-NEXT:  .LBB0_1: # %loop
; X64-NEXT:    # =>This Inner Loop Header: Depth=1
; X64-NEXT:    movl (%rdi), %eax
; X64-NEXT:    movl 16(%rdi), %ecx
; X64-NEXT:    leal 1(%rax,%rcx), %edx
; X64-NEXT:    movl %edx, 12(%rdi)
; X64-NEXT:    decl %esi
; X64-NEXT:    jne .LBB0_1
; X64-NEXT:  # %bb.2: # %exit
; X64-NEXT:    addl %ecx, %eax
; X64-NEXT:    leal 1(%rcx,%rax), %eax
; X64-NEXT:    movl %eax, 16(%rdi)
; X64-NEXT:    retq
;
; X86-LABEL: foo:
; X86:       # %bb.0: # %entry
; X86-NEXT:    pushl %edi
; X86-NEXT:    .cfi_def_cfa_offset 8
; X86-NEXT:    pushl %esi
; X86-NEXT:    .cfi_def_cfa_offset 12
; X86-NEXT:    .cfi_offset %esi, -12
; X86-NEXT:    .cfi_offset %edi, -8
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    .p2align 4, 0x90
; X86-NEXT:  .LBB0_1: # %loop
; X86-NEXT:    # =>This Inner Loop Header: Depth=1
; X86-NEXT:    movl (%eax), %edx
; X86-NEXT:    movl 16(%eax), %esi
; X86-NEXT:    leal 1(%edx,%esi), %edi
; X86-NEXT:    movl %edi, 12(%eax)
; X86-NEXT:    decl %ecx
; X86-NEXT:    jne .LBB0_1
; X86-NEXT:  # %bb.2: # %exit
; X86-NEXT:    addl %esi, %edx
; X86-NEXT:    leal 1(%esi,%edx), %ecx
; X86-NEXT:    movl %ecx, 16(%eax)
; X86-NEXT:    popl %esi
; X86-NEXT:    .cfi_def_cfa_offset 8
; X86-NEXT:    popl %edi
; X86-NEXT:    .cfi_def_cfa_offset 4
; X86-NEXT:    retl
 entry:
   br label %loop

 loop:
   %iter = phi i32 [%n ,%entry ] ,[ %iter.ctr ,%loop]
   %h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0
   %0 = load i32, i32* %h0, align 8
   %h3 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 3
   %h4 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 4
   %1 = load i32, i32* %h4, align 8
   %add = add i32 %0, 1
   %add4 = add i32 %add, %1
   store i32 %add4, i32* %h3, align 4
   %add29 = add i32 %add4, %1
   %iter.ctr = sub i32 %iter , 1
   %res = icmp ne i32 %iter.ctr , 0
   br i1 %res , label %loop , label %exit

 exit:
   store i32 %add29, i32* %h4, align 8
   ret void
}
[X86] Adding test cases for LEA factorization (PR32755 / D35014) Differential Revision: https://reviews.llvm.org/D35886 llvm-svn: 309262 2017-07-27 18:36:09 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
Revert "[X86] Improvement in CodeGen instruction selection for LEAs." This reverts r319543, due to ASan bot breakage. llvm-svn: 319591 2017-12-02 06:20:26 +08:00			`; RUN: llc < %s -mtriple=x86_64-unknown \| FileCheck %s -check-prefix=X64`
			`; RUN: llc < %s -mtriple=i686-unknown \| FileCheck %s -check-prefix=X86`
[X86] Adding test cases for LEA factorization (PR32755 / D35014) Differential Revision: https://reviews.llvm.org/D35886 llvm-svn: 309262 2017-07-27 18:36:09 +08:00
			`%struct.SA = type { i32 , i32 , i32 , i32 , i32};`

			`define void @foo(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0 {`
			`; X64-LABEL: foo:`
[CodeGen] Unify MBB reference format in both MIR and debug output As part of the unification of the debug format and the MIR format, print MBB references as '%bb.5'. The MIR printer prints the IR name of a MBB only for block definitions. * find . \( -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#" << ([a-zA-Z0-9_]+)->getNumber\(\)/" << printMBBReference(\1)/g' find . \( -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#" << ([a-zA-Z0-9_]+)\.getNumber\(\)/" << printMBBReference(\1)/g' * find . \( -name ".txt" -o -name ".s" -o -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#([0-9]+)/%bb.\1/g' * grep -nr 'BB#' and fix Differential Revision: https://reviews.llvm.org/D40422 llvm-svn: 319665 2017-12-05 01:18:51 +08:00			`; X64: # %bb.0: # %entry`
[X86] Adding test cases for LEA factorization (PR32755 / D35014) Differential Revision: https://reviews.llvm.org/D35886 llvm-svn: 309262 2017-07-27 18:36:09 +08:00			`; X64-NEXT: .p2align 4, 0x90`
			`; X64-NEXT: .LBB0_1: # %loop`
			`; X64-NEXT: # =>This Inner Loop Header: Depth=1`
Revert "[X86] Improvement in CodeGen instruction selection for LEAs." This reverts r319543, due to ASan bot breakage. llvm-svn: 319591 2017-12-02 06:20:26 +08:00			`; X64-NEXT: movl (%rdi), %eax`
			`; X64-NEXT: movl 16(%rdi), %ecx`
			`; X64-NEXT: leal 1(%rax,%rcx), %edx`
			`; X64-NEXT: movl %edx, 12(%rdi)`
[X86] Adding test cases for LEA factorization (PR32755 / D35014) Differential Revision: https://reviews.llvm.org/D35886 llvm-svn: 309262 2017-07-27 18:36:09 +08:00			`; X64-NEXT: decl %esi`
			`; X64-NEXT: jne .LBB0_1`
[CodeGen] Unify MBB reference format in both MIR and debug output As part of the unification of the debug format and the MIR format, print MBB references as '%bb.5'. The MIR printer prints the IR name of a MBB only for block definitions. * find . \( -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#" << ([a-zA-Z0-9_]+)->getNumber\(\)/" << printMBBReference(\1)/g' find . \( -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#" << ([a-zA-Z0-9_]+)\.getNumber\(\)/" << printMBBReference(\1)/g' * find . \( -name ".txt" -o -name ".s" -o -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#([0-9]+)/%bb.\1/g' * grep -nr 'BB#' and fix Differential Revision: https://reviews.llvm.org/D40422 llvm-svn: 319665 2017-12-05 01:18:51 +08:00			`; X64-NEXT: # %bb.2: # %exit`
Revert "[X86] Improvement in CodeGen instruction selection for LEAs." This reverts r319543, due to ASan bot breakage. llvm-svn: 319591 2017-12-02 06:20:26 +08:00			`; X64-NEXT: addl %ecx, %eax`
			`; X64-NEXT: leal 1(%rcx,%rax), %eax`
			`; X64-NEXT: movl %eax, 16(%rdi)`
[X86] Adding test cases for LEA factorization (PR32755 / D35014) Differential Revision: https://reviews.llvm.org/D35886 llvm-svn: 309262 2017-07-27 18:36:09 +08:00			`; X64-NEXT: retq`
			`;`
			`; X86-LABEL: foo:`
[CodeGen] Unify MBB reference format in both MIR and debug output As part of the unification of the debug format and the MIR format, print MBB references as '%bb.5'. The MIR printer prints the IR name of a MBB only for block definitions. * find . \( -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#" << ([a-zA-Z0-9_]+)->getNumber\(\)/" << printMBBReference(\1)/g' find . \( -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#" << ([a-zA-Z0-9_]+)\.getNumber\(\)/" << printMBBReference(\1)/g' * find . \( -name ".txt" -o -name ".s" -o -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#([0-9]+)/%bb.\1/g' * grep -nr 'BB#' and fix Differential Revision: https://reviews.llvm.org/D40422 llvm-svn: 319665 2017-12-05 01:18:51 +08:00			`; X86: # %bb.0: # %entry`
Revert "[X86] Improvement in CodeGen instruction selection for LEAs." This reverts r319543, due to ASan bot breakage. llvm-svn: 319591 2017-12-02 06:20:26 +08:00			`; X86-NEXT: pushl %edi`
[X86] Improvement in CodeGen instruction selection for LEAs. Summary: 1/ Operand folding during complex pattern matching for LEAs has been extended, such that it promotes Scale to accommodate similar operand appearing in the DAG e.g. T1 = A + B T2 = T1 + 10 T3 = T2 + A For above DAG rooted at T3, X86AddressMode will now look like Base = B , Index = A , Scale = 2 , Disp = 10 2/ During OptimizeLEAPass down the pipeline factorization is now performed over LEAs so that if there is an opportunity then complex LEAs (having 3 operands) could be factored out e.g. leal 1(%rax,%rcx,1), %rdx leal 1(%rax,%rcx,2), %rcx will be factored as following leal 1(%rax,%rcx,1), %rdx leal (%rdx,%rcx) , %edx 3/ Aggressive operand folding for AM based selection for LEAs is sensitive to loops, thus avoiding creation of any complex LEAs within a loop. 4/ Simplify LEA converts (lea (BASE,1,INDEX,0) --> add (BASE, INDEX) which offers better through put. PR32755 will be taken care of by this pathc. Previous patch revisions : r313343 , r314886 Reviewers: lsaba, RKSimon, craig.topper, qcolombet, jmolloy, jbhateja Reviewed By: lsaba, RKSimon, jbhateja Subscribers: jmolloy, spatel, igorb, llvm-commits Differential Revision: https://reviews.llvm.org/D35014 llvm-svn: 319543 2017-12-01 22:07:38 +08:00			`; X86-NEXT: .cfi_def_cfa_offset 8`
Revert "[X86] Improvement in CodeGen instruction selection for LEAs." This reverts r319543, due to ASan bot breakage. llvm-svn: 319591 2017-12-02 06:20:26 +08:00			`; X86-NEXT: pushl %esi`
			`; X86-NEXT: .cfi_def_cfa_offset 12`
			`; X86-NEXT: .cfi_offset %esi, -12`
			`; X86-NEXT: .cfi_offset %edi, -8`
[X86] Adding test cases for LEA factorization (PR32755 / D35014) Differential Revision: https://reviews.llvm.org/D35886 llvm-svn: 309262 2017-07-27 18:36:09 +08:00			`; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx`
			`; X86-NEXT: movl {{[0-9]+}}(%esp), %eax`
			`; X86-NEXT: .p2align 4, 0x90`
			`; X86-NEXT: .LBB0_1: # %loop`
			`; X86-NEXT: # =>This Inner Loop Header: Depth=1`
Revert "[X86] Improvement in CodeGen instruction selection for LEAs." This reverts r319543, due to ASan bot breakage. llvm-svn: 319591 2017-12-02 06:20:26 +08:00			`; X86-NEXT: movl (%eax), %edx`
			`; X86-NEXT: movl 16(%eax), %esi`
			`; X86-NEXT: leal 1(%edx,%esi), %edi`
			`; X86-NEXT: movl %edi, 12(%eax)`
[X86] Adding test cases for LEA factorization (PR32755 / D35014) Differential Revision: https://reviews.llvm.org/D35886 llvm-svn: 309262 2017-07-27 18:36:09 +08:00			`; X86-NEXT: decl %ecx`
			`; X86-NEXT: jne .LBB0_1`
[CodeGen] Unify MBB reference format in both MIR and debug output As part of the unification of the debug format and the MIR format, print MBB references as '%bb.5'. The MIR printer prints the IR name of a MBB only for block definitions. * find . \( -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#" << ([a-zA-Z0-9_]+)->getNumber\(\)/" << printMBBReference(\1)/g' find . \( -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#" << ([a-zA-Z0-9_]+)\.getNumber\(\)/" << printMBBReference(\1)/g' * find . \( -name ".txt" -o -name ".s" -o -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#([0-9]+)/%bb.\1/g' * grep -nr 'BB#' and fix Differential Revision: https://reviews.llvm.org/D40422 llvm-svn: 319665 2017-12-05 01:18:51 +08:00			`; X86-NEXT: # %bb.2: # %exit`
Revert "[X86] Improvement in CodeGen instruction selection for LEAs." This reverts r319543, due to ASan bot breakage. llvm-svn: 319591 2017-12-02 06:20:26 +08:00			`; X86-NEXT: addl %esi, %edx`
			`; X86-NEXT: leal 1(%esi,%edx), %ecx`
			`; X86-NEXT: movl %ecx, 16(%eax)`
[X86] Adding test cases for LEA factorization (PR32755 / D35014) Differential Revision: https://reviews.llvm.org/D35886 llvm-svn: 309262 2017-07-27 18:36:09 +08:00			`; X86-NEXT: popl %esi`
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706 2018-04-24 18:32:08 +08:00			`; X86-NEXT: .cfi_def_cfa_offset 8`
Revert "[X86] Improvement in CodeGen instruction selection for LEAs." This reverts r319543, due to ASan bot breakage. llvm-svn: 319591 2017-12-02 06:20:26 +08:00			`; X86-NEXT: popl %edi`
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706 2018-04-24 18:32:08 +08:00			`; X86-NEXT: .cfi_def_cfa_offset 4`
[X86] Adding test cases for LEA factorization (PR32755 / D35014) Differential Revision: https://reviews.llvm.org/D35886 llvm-svn: 309262 2017-07-27 18:36:09 +08:00			`; X86-NEXT: retl`
			`entry:`
			`br label %loop`

			`loop:`
			`%iter = phi i32 [%n ,%entry ] ,[ %iter.ctr ,%loop]`
			`%h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0`
			`%0 = load i32, i32* %h0, align 8`
			`%h3 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 3`
			`%h4 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 4`
			`%1 = load i32, i32* %h4, align 8`
			`%add = add i32 %0, 1`
			`%add4 = add i32 %add, %1`
			`store i32 %add4, i32* %h3, align 4`
			`%add29 = add i32 %add4, %1`
			`%iter.ctr = sub i32 %iter , 1`
			`%res = icmp ne i32 %iter.ctr , 0`
			`br i1 %res , label %loop , label %exit`

			`exit:`
			`store i32 %add29, i32* %h4, align 8`
			`ret void`
			`}`