llvm-project/llvm/test/CodeGen/X86/wide-integer-cmp.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=i686-linux-gnu %s -o - | FileCheck %s

define i32 @branch_eq(i64 %a, i64 %b) {
; CHECK-LABEL: branch_eq:
; CHECK:       # BB#0: # %entry
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    orl %ecx, %eax
; CHECK-NEXT:    jne .LBB0_2
; CHECK-NEXT:  # BB#1: # %bb1
; CHECK-NEXT:    movl $1, %eax
; CHECK-NEXT:    retl
; CHECK-NEXT:  .LBB0_2: # %bb2
; CHECK-NEXT:    movl $2, %eax
; CHECK-NEXT:    retl
entry:
  %cmp = icmp eq i64 %a, %b
	br i1 %cmp, label %bb1, label %bb2
bb1:
  ret i32 1
bb2:
  ret i32 2
}

define i32 @branch_slt(i64 %a, i64 %b) {
; CHECK-LABEL: branch_slt:
; CHECK:       # BB#0: # %entry
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT:    jge .LBB1_2
; CHECK-NEXT:  # BB#1: # %bb1
; CHECK-NEXT:    movl $1, %eax
; CHECK-NEXT:    retl
; CHECK-NEXT:  .LBB1_2: # %bb2
; CHECK-NEXT:    movl $2, %eax
; CHECK-NEXT:    retl
entry:
  %cmp = icmp slt i64 %a, %b
	br i1 %cmp, label %bb1, label %bb2
bb1:
  ret i32 1
bb2:
  ret i32 2
}

define i32 @branch_ule(i64 %a, i64 %b) {
; CHECK-LABEL: branch_ule:
; CHECK:       # BB#0: # %entry
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT:    jb .LBB2_2
; CHECK-NEXT:  # BB#1: # %bb1
; CHECK-NEXT:    movl $1, %eax
; CHECK-NEXT:    retl
; CHECK-NEXT:  .LBB2_2: # %bb2
; CHECK-NEXT:    movl $2, %eax
; CHECK-NEXT:    retl
entry:
  %cmp = icmp ule i64 %a, %b
	br i1 %cmp, label %bb1, label %bb2
bb1:
  ret i32 1
bb2:
  ret i32 2
}

define i32 @set_gt(i64 %a, i64 %b) {
; CHECK-LABEL: set_gt:
; CHECK:       # BB#0: # %entry
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT:    setl %al
; CHECK-NEXT:    movzbl %al, %eax
; CHECK-NEXT:    retl
entry:
  %cmp = icmp sgt i64 %a, %b
  %res = select i1 %cmp, i32 1, i32 0
  ret i32 %res
}

define i32 @test_wide(i128 %a, i128 %b) {
; CHECK-LABEL: test_wide:
; CHECK:       # BB#0: # %entry
; CHECK-NEXT:    pushl %esi
; CHECK-NEXT:    .cfi_def_cfa_offset 8
; CHECK-NEXT:    .cfi_offset %esi, -8
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT:    cmpl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT:    sbbl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT:    sbbl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    jge .LBB4_2
; CHECK-NEXT:  # BB#1: # %bb1
; CHECK-NEXT:    movl $1, %eax
; CHECK-NEXT:    popl %esi
; CHECK-NEXT:    .cfi_def_cfa_offset 4
; CHECK-NEXT:    retl
; CHECK-NEXT:  .LBB4_2: # %bb2
; CHECK-NEXT:    .cfi_def_cfa_offset 8
; CHECK-NEXT:    movl $2, %eax
; CHECK-NEXT:    popl %esi
; CHECK-NEXT:    .cfi_def_cfa_offset 4
; CHECK-NEXT:    retl
entry:
  %cmp = icmp slt i128 %a, %b
	br i1 %cmp, label %bb1, label %bb2
bb1:
  ret i32 1
bb2:
  ret i32 2
}

; The comparison of the low bits will be folded to a CARRY_FALSE node. Make
; sure the code can handle that.
define i32 @test_carry_false(i64 %a, i64 %b) {
; CHECK-LABEL: test_carry_false:
; CHECK:       # BB#0: # %entry
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    jge .LBB5_2
; CHECK-NEXT:  # BB#1: # %bb1
; CHECK-NEXT:    movl $1, %eax
; CHECK-NEXT:    retl
; CHECK-NEXT:  .LBB5_2: # %bb2
; CHECK-NEXT:    movl $2, %eax
; CHECK-NEXT:    retl
entry:
  %x = and i64 %a, -4294967296 ;0xffffffff00000000
  %y = and i64 %b, -4294967296
  %cmp = icmp slt i64 %x, %y
	br i1 %cmp, label %bb1, label %bb2
bb1:
  ret i32 1
bb2:
  ret i32 2
}
Regen expected tests result. NFC llvm-svn: 294866 2017-02-12 03:27:15 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
X86: More efficient legalization of wide integer compares In particular, this makes the code for 64-bit compares on 32-bit targets much more efficient. Example: define i32 @test_slt(i64 %a, i64 %b) { entry: %cmp = icmp slt i64 %a, %b br i1 %cmp, label %bb1, label %bb2 bb1: ret i32 1 bb2: ret i32 2 } Before this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax setae %al cmpl 16(%esp), %ecx setge %cl je .LBB2_2 movb %cl, %al .LBB2_2: testb %al, %al jne .LBB2_4 movl $1, %eax retl .LBB2_4: movl $2, %eax retl After this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax sbbl 16(%esp), %ecx jge .LBB1_2 movl $1, %eax retl .LBB1_2: movl $2, %eax retl Differential Revision: http://reviews.llvm.org/D14496 llvm-svn: 253572 2015-11-20 00:35:08 +08:00			`; RUN: llc -mtriple=i686-linux-gnu %s -o - \| FileCheck %s`

			`define i32 @branch_eq(i64 %a, i64 %b) {`
Regen expected tests result. NFC llvm-svn: 294866 2017-02-12 03:27:15 +08:00			`; CHECK-LABEL: branch_eq:`
			`; CHECK: # BB#0: # %entry`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx`
			`; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %ecx`
			`; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %eax`
			`; CHECK-NEXT: orl %ecx, %eax`
			`; CHECK-NEXT: jne .LBB0_2`
			`; CHECK-NEXT: # BB#1: # %bb1`
			`; CHECK-NEXT: movl $1, %eax`
			`; CHECK-NEXT: retl`
			`; CHECK-NEXT: .LBB0_2: # %bb2`
			`; CHECK-NEXT: movl $2, %eax`
			`; CHECK-NEXT: retl`
X86: More efficient legalization of wide integer compares In particular, this makes the code for 64-bit compares on 32-bit targets much more efficient. Example: define i32 @test_slt(i64 %a, i64 %b) { entry: %cmp = icmp slt i64 %a, %b br i1 %cmp, label %bb1, label %bb2 bb1: ret i32 1 bb2: ret i32 2 } Before this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax setae %al cmpl 16(%esp), %ecx setge %cl je .LBB2_2 movb %cl, %al .LBB2_2: testb %al, %al jne .LBB2_4 movl $1, %eax retl .LBB2_4: movl $2, %eax retl After this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax sbbl 16(%esp), %ecx jge .LBB1_2 movl $1, %eax retl .LBB1_2: movl $2, %eax retl Differential Revision: http://reviews.llvm.org/D14496 llvm-svn: 253572 2015-11-20 00:35:08 +08:00			`entry:`
			`%cmp = icmp eq i64 %a, %b`
			`br i1 %cmp, label %bb1, label %bb2`
			`bb1:`
			`ret i32 1`
			`bb2:`
			`ret i32 2`
			`}`

			`define i32 @branch_slt(i64 %a, i64 %b) {`
Regen expected tests result. NFC llvm-svn: 294866 2017-02-12 03:27:15 +08:00			`; CHECK-LABEL: branch_slt:`
			`; CHECK: # BB#0: # %entry`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx`
			`; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax`
			`; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx`
			`; CHECK-NEXT: jge .LBB1_2`
			`; CHECK-NEXT: # BB#1: # %bb1`
			`; CHECK-NEXT: movl $1, %eax`
			`; CHECK-NEXT: retl`
			`; CHECK-NEXT: .LBB1_2: # %bb2`
			`; CHECK-NEXT: movl $2, %eax`
			`; CHECK-NEXT: retl`
X86: More efficient legalization of wide integer compares In particular, this makes the code for 64-bit compares on 32-bit targets much more efficient. Example: define i32 @test_slt(i64 %a, i64 %b) { entry: %cmp = icmp slt i64 %a, %b br i1 %cmp, label %bb1, label %bb2 bb1: ret i32 1 bb2: ret i32 2 } Before this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax setae %al cmpl 16(%esp), %ecx setge %cl je .LBB2_2 movb %cl, %al .LBB2_2: testb %al, %al jne .LBB2_4 movl $1, %eax retl .LBB2_4: movl $2, %eax retl After this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax sbbl 16(%esp), %ecx jge .LBB1_2 movl $1, %eax retl .LBB1_2: movl $2, %eax retl Differential Revision: http://reviews.llvm.org/D14496 llvm-svn: 253572 2015-11-20 00:35:08 +08:00			`entry:`
			`%cmp = icmp slt i64 %a, %b`
			`br i1 %cmp, label %bb1, label %bb2`
			`bb1:`
			`ret i32 1`
			`bb2:`
			`ret i32 2`
			`}`

			`define i32 @branch_ule(i64 %a, i64 %b) {`
Regen expected tests result. NFC llvm-svn: 294866 2017-02-12 03:27:15 +08:00			`; CHECK-LABEL: branch_ule:`
			`; CHECK: # BB#0: # %entry`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx`
			`; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax`
			`; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx`
			`; CHECK-NEXT: jb .LBB2_2`
			`; CHECK-NEXT: # BB#1: # %bb1`
			`; CHECK-NEXT: movl $1, %eax`
			`; CHECK-NEXT: retl`
			`; CHECK-NEXT: .LBB2_2: # %bb2`
			`; CHECK-NEXT: movl $2, %eax`
			`; CHECK-NEXT: retl`
X86: More efficient legalization of wide integer compares In particular, this makes the code for 64-bit compares on 32-bit targets much more efficient. Example: define i32 @test_slt(i64 %a, i64 %b) { entry: %cmp = icmp slt i64 %a, %b br i1 %cmp, label %bb1, label %bb2 bb1: ret i32 1 bb2: ret i32 2 } Before this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax setae %al cmpl 16(%esp), %ecx setge %cl je .LBB2_2 movb %cl, %al .LBB2_2: testb %al, %al jne .LBB2_4 movl $1, %eax retl .LBB2_4: movl $2, %eax retl After this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax sbbl 16(%esp), %ecx jge .LBB1_2 movl $1, %eax retl .LBB1_2: movl $2, %eax retl Differential Revision: http://reviews.llvm.org/D14496 llvm-svn: 253572 2015-11-20 00:35:08 +08:00			`entry:`
			`%cmp = icmp ule i64 %a, %b`
			`br i1 %cmp, label %bb1, label %bb2`
			`bb1:`
			`ret i32 1`
			`bb2:`
			`ret i32 2`
			`}`

			`define i32 @set_gt(i64 %a, i64 %b) {`
Regen expected tests result. NFC llvm-svn: 294866 2017-02-12 03:27:15 +08:00			`; CHECK-LABEL: set_gt:`
			`; CHECK: # BB#0: # %entry`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx`
			`; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax`
			`; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx`
			`; CHECK-NEXT: setl %al`
			`; CHECK-NEXT: movzbl %al, %eax`
			`; CHECK-NEXT: retl`
X86: More efficient legalization of wide integer compares In particular, this makes the code for 64-bit compares on 32-bit targets much more efficient. Example: define i32 @test_slt(i64 %a, i64 %b) { entry: %cmp = icmp slt i64 %a, %b br i1 %cmp, label %bb1, label %bb2 bb1: ret i32 1 bb2: ret i32 2 } Before this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax setae %al cmpl 16(%esp), %ecx setge %cl je .LBB2_2 movb %cl, %al .LBB2_2: testb %al, %al jne .LBB2_4 movl $1, %eax retl .LBB2_4: movl $2, %eax retl After this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax sbbl 16(%esp), %ecx jge .LBB1_2 movl $1, %eax retl .LBB1_2: movl $2, %eax retl Differential Revision: http://reviews.llvm.org/D14496 llvm-svn: 253572 2015-11-20 00:35:08 +08:00			`entry:`
			`%cmp = icmp sgt i64 %a, %b`
			`%res = select i1 %cmp, i32 1, i32 0`
			`ret i32 %res`
			`}`

			`define i32 @test_wide(i128 %a, i128 %b) {`
Regen expected tests result. NFC llvm-svn: 294866 2017-02-12 03:27:15 +08:00			`; CHECK-LABEL: test_wide:`
			`; CHECK: # BB#0: # %entry`
			`; CHECK-NEXT: pushl %esi`
			`; CHECK-NEXT: .cfi_def_cfa_offset 8`
			`; CHECK-NEXT: .cfi_offset %esi, -8`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi`
			`; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %edx`
			`; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %esi`
Revert "[DAG] Rewrite areNonVolatileConsecutiveLoads to use BaseIndexOffset" This reverts commit r306819 which appears be exposing underlying issues in a stage1 ppc64be build llvm-svn: 306820 2017-06-30 20:56:02 +08:00			`; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx`
Rewrite areNonVolatileConsecutiveLoads to use BaseIndexOffset Relanding after rewriting undef.ll test to avoid host-dependant endianness. As discussed in D34087, rewrite areNonVolatileConsecutiveLoads using generic checks. Also, propagate missing local handling from there to BaseIndexOffset checks. Tests of note: * test/CodeGen/X86/build-vector* - Improved. * test/CodeGen/BPF/undef.ll - Improved store alignment allows an additional store merge * test/CodeGen/X86/clear_upper_vector_element_bits.ll - This is a case we already do not handle well. Here, the DAG is improved, but scheduling causes a code size degradation. Reviewers: RKSimon, craig.topper, spatel, andreadb, filcab Subscribers: nemanjai, llvm-commits Differential Revision: https://reviews.llvm.org/D34472 llvm-svn: 307114 2017-07-05 09:21:23 +08:00			`; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %eax`
Regen expected tests result. NFC llvm-svn: 294866 2017-02-12 03:27:15 +08:00			`; CHECK-NEXT: jge .LBB4_2`
			`; CHECK-NEXT: # BB#1: # %bb1`
			`; CHECK-NEXT: movl $1, %eax`
			`; CHECK-NEXT: popl %esi`
Reland "Correct dwarf unwind information in function epilogue for X86" Reland r317100 with minor fix regarding ComputeCommonTailLength function in BranchFolding.cpp. Skipping top CFI instructions block needs to executed on several more return points in ComputeCommonTailLength(). Original r317100 message: "Correct dwarf unwind information in function epilogue for X86" This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: - CFI instructions do not affect code generation - Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Changed CFI instructions so that they: - are duplicable - are not counted as instructions when tail duplicating or tail merging - can be compared as equal Added CFIInstrInserter pass: - analyzes each basic block to determine cfa offset and register valid at its entry and exit - verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors - inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. llvm-svn: 317579 2017-11-07 22:40:27 +08:00			`; CHECK-NEXT: .cfi_def_cfa_offset 4`
Regen expected tests result. NFC llvm-svn: 294866 2017-02-12 03:27:15 +08:00			`; CHECK-NEXT: retl`
			`; CHECK-NEXT: .LBB4_2: # %bb2`
Reland "Correct dwarf unwind information in function epilogue for X86" Reland r317100 with minor fix regarding ComputeCommonTailLength function in BranchFolding.cpp. Skipping top CFI instructions block needs to executed on several more return points in ComputeCommonTailLength(). Original r317100 message: "Correct dwarf unwind information in function epilogue for X86" This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: - CFI instructions do not affect code generation - Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Changed CFI instructions so that they: - are duplicable - are not counted as instructions when tail duplicating or tail merging - can be compared as equal Added CFIInstrInserter pass: - analyzes each basic block to determine cfa offset and register valid at its entry and exit - verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors - inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. llvm-svn: 317579 2017-11-07 22:40:27 +08:00			`; CHECK-NEXT: .cfi_def_cfa_offset 8`
Regen expected tests result. NFC llvm-svn: 294866 2017-02-12 03:27:15 +08:00			`; CHECK-NEXT: movl $2, %eax`
			`; CHECK-NEXT: popl %esi`
Reland "Correct dwarf unwind information in function epilogue for X86" Reland r317100 with minor fix regarding ComputeCommonTailLength function in BranchFolding.cpp. Skipping top CFI instructions block needs to executed on several more return points in ComputeCommonTailLength(). Original r317100 message: "Correct dwarf unwind information in function epilogue for X86" This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: - CFI instructions do not affect code generation - Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Changed CFI instructions so that they: - are duplicable - are not counted as instructions when tail duplicating or tail merging - can be compared as equal Added CFIInstrInserter pass: - analyzes each basic block to determine cfa offset and register valid at its entry and exit - verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors - inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. llvm-svn: 317579 2017-11-07 22:40:27 +08:00			`; CHECK-NEXT: .cfi_def_cfa_offset 4`
Regen expected tests result. NFC llvm-svn: 294866 2017-02-12 03:27:15 +08:00			`; CHECK-NEXT: retl`
X86: More efficient legalization of wide integer compares In particular, this makes the code for 64-bit compares on 32-bit targets much more efficient. Example: define i32 @test_slt(i64 %a, i64 %b) { entry: %cmp = icmp slt i64 %a, %b br i1 %cmp, label %bb1, label %bb2 bb1: ret i32 1 bb2: ret i32 2 } Before this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax setae %al cmpl 16(%esp), %ecx setge %cl je .LBB2_2 movb %cl, %al .LBB2_2: testb %al, %al jne .LBB2_4 movl $1, %eax retl .LBB2_4: movl $2, %eax retl After this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax sbbl 16(%esp), %ecx jge .LBB1_2 movl $1, %eax retl .LBB1_2: movl $2, %eax retl Differential Revision: http://reviews.llvm.org/D14496 llvm-svn: 253572 2015-11-20 00:35:08 +08:00			`entry:`
			`%cmp = icmp slt i128 %a, %b`
			`br i1 %cmp, label %bb1, label %bb2`
			`bb1:`
			`ret i32 1`
			`bb2:`
			`ret i32 2`
			`}`

Regen expected tests result. NFC llvm-svn: 294866 2017-02-12 03:27:15 +08:00			`; The comparison of the low bits will be folded to a CARRY_FALSE node. Make`
			`; sure the code can handle that.`
X86: More efficient legalization of wide integer compares In particular, this makes the code for 64-bit compares on 32-bit targets much more efficient. Example: define i32 @test_slt(i64 %a, i64 %b) { entry: %cmp = icmp slt i64 %a, %b br i1 %cmp, label %bb1, label %bb2 bb1: ret i32 1 bb2: ret i32 2 } Before this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax setae %al cmpl 16(%esp), %ecx setge %cl je .LBB2_2 movb %cl, %al .LBB2_2: testb %al, %al jne .LBB2_4 movl $1, %eax retl .LBB2_4: movl $2, %eax retl After this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax sbbl 16(%esp), %ecx jge .LBB1_2 movl $1, %eax retl .LBB1_2: movl $2, %eax retl Differential Revision: http://reviews.llvm.org/D14496 llvm-svn: 253572 2015-11-20 00:35:08 +08:00			`define i32 @test_carry_false(i64 %a, i64 %b) {`
Regen expected tests result. NFC llvm-svn: 294866 2017-02-12 03:27:15 +08:00			`; CHECK-LABEL: test_carry_false:`
			`; CHECK: # BB#0: # %entry`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax`
			`; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax`
			`; CHECK-NEXT: jge .LBB5_2`
			`; CHECK-NEXT: # BB#1: # %bb1`
			`; CHECK-NEXT: movl $1, %eax`
			`; CHECK-NEXT: retl`
			`; CHECK-NEXT: .LBB5_2: # %bb2`
			`; CHECK-NEXT: movl $2, %eax`
			`; CHECK-NEXT: retl`
X86: More efficient legalization of wide integer compares In particular, this makes the code for 64-bit compares on 32-bit targets much more efficient. Example: define i32 @test_slt(i64 %a, i64 %b) { entry: %cmp = icmp slt i64 %a, %b br i1 %cmp, label %bb1, label %bb2 bb1: ret i32 1 bb2: ret i32 2 } Before this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax setae %al cmpl 16(%esp), %ecx setge %cl je .LBB2_2 movb %cl, %al .LBB2_2: testb %al, %al jne .LBB2_4 movl $1, %eax retl .LBB2_4: movl $2, %eax retl After this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax sbbl 16(%esp), %ecx jge .LBB1_2 movl $1, %eax retl .LBB1_2: movl $2, %eax retl Differential Revision: http://reviews.llvm.org/D14496 llvm-svn: 253572 2015-11-20 00:35:08 +08:00			`entry:`
			`%x = and i64 %a, -4294967296 ;0xffffffff00000000`
			`%y = and i64 %b, -4294967296`
			`%cmp = icmp slt i64 %x, %y`
			`br i1 %cmp, label %bb1, label %bb2`
			`bb1:`
			`ret i32 1`
			`bb2:`
			`ret i32 2`
			`}`