llvm-project/llvm/test/CodeGen/X86/sext-i1.ll

177 lines
4.2 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-unknown-unknown -disable-cgp-branch-opts | FileCheck %s --check-prefix=CHECK --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-cgp-branch-opts | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; rdar://7573216
; PR6146
define i32 @t1(i32 %x) nounwind readnone ssp {
; X32-LABEL: t1:
; X32: # BB#0:
; X32-NEXT: cmpl $1, {{[0-9]+}}(%esp)
; X32-NEXT: sbbl %eax, %eax
; X32-NEXT: retl
;
; X64-LABEL: t1:
; X64: # BB#0:
; X64-NEXT: cmpl $1, %edi
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: retq
%t0 = icmp eq i32 %x, 0
%if = select i1 %t0, i32 -1, i32 0
ret i32 %if
}
define i32 @t2(i32 %x) nounwind readnone ssp {
; X32-LABEL: t2:
; X32: # BB#0:
; X32-NEXT: cmpl $1, {{[0-9]+}}(%esp)
; X32-NEXT: sbbl %eax, %eax
; X32-NEXT: retl
;
; X64-LABEL: t2:
; X64: # BB#0:
; X64-NEXT: cmpl $1, %edi
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: retq
%t0 = icmp eq i32 %x, 0
%if = sext i1 %t0 to i32
ret i32 %if
}
define i32 @t3() nounwind readonly {
; X32-LABEL: t3:
; X32: # BB#0: # %entry
; X32-NEXT: cmpl $1, %eax
; X32-NEXT: sbbl %eax, %eax
; X32-NEXT: cmpl %eax, %eax
; X32-NEXT: sbbl %eax, %eax
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: retl
;
; X64-LABEL: t3:
; X64: # BB#0: # %entry
[x86] use more shift or LEA for select-of-constants (2nd try) The previous rev (r310208) failed to account for overflow when subtracting the constants to see if they're suitable for shift/lea. This version add a check for that and more test were added in r310490. We can convert any select-of-constants to math ops: http://rise4fun.com/Alive/d7d For this patch, I'm enhancing an existing x86 transform that uses fake multiplies (they always become shl/lea) to avoid cmov or branching. The current code misses cases where we have a negative constant and a positive constant, so this is just trying to plug that hole. The DAGCombiner diff prevents us from hitting a terrible inefficiency: we can start with a select in IR, create a select DAG node, convert it into a sext, convert it back into a select, and then lower it to sext machine code. Some notes about the test diffs: 1. 2010-08-04-MaskedSignedCompare.ll - We were creating control flow that didn't exist in the IR. 2. memcmp.ll - Choose -1 or 1 is the case that got me looking at this again. We could avoid the push/pop in some cases if we used 'movzbl %al' instead of an xor on a different reg? That's a post-DAG problem though. 3. mul-constant-result.ll - The trade-off between sbb+not vs. setne+neg could be addressed if that's a regression, but those would always be nearly equivalent. 4. pr22338.ll and sext-i1.ll - These tests have undef operands, so we don't actually care about these diffs. 5. sbb.ll - This shows a win for what is likely a common case: choose -1 or 0. 6. select.ll - There's another borderline case here: cmp+sbb+or vs. test+set+lea? Also, sbb+not vs. setae+neg shows up again. 7. select_const.ll - These are motivating cases for the enhancement; replace cmov with cheaper ops. Assembly differences between movzbl and xor to avoid a partial reg stall are caused later by the X86 Fixup SetCC pass. Differential Revision: https://reviews.llvm.org/D35340 llvm-svn: 310717
2017-08-11 23:44:14 +08:00
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: testl %eax, %eax
; X64-NEXT: sete %al
; X64-NEXT: negq %rax
; X64-NEXT: cmpq %rax, %rax
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: retq
entry:
%not.tobool = icmp eq i32 undef, 0
%cond = sext i1 %not.tobool to i32
%conv = sext i1 %not.tobool to i64
%add13 = add i64 0, %conv
%cmp = icmp ult i64 undef, %add13
br i1 %cmp, label %if.then, label %if.end
if.then:
br label %if.end
if.end:
%xor27 = xor i32 undef, %cond
ret i32 0
}
define i32 @t4(i64 %x) nounwind readnone ssp {
; X32-LABEL: t4:
; X32: # BB#0:
[x86] use more shift or LEA for select-of-constants (2nd try) The previous rev (r310208) failed to account for overflow when subtracting the constants to see if they're suitable for shift/lea. This version add a check for that and more test were added in r310490. We can convert any select-of-constants to math ops: http://rise4fun.com/Alive/d7d For this patch, I'm enhancing an existing x86 transform that uses fake multiplies (they always become shl/lea) to avoid cmov or branching. The current code misses cases where we have a negative constant and a positive constant, so this is just trying to plug that hole. The DAGCombiner diff prevents us from hitting a terrible inefficiency: we can start with a select in IR, create a select DAG node, convert it into a sext, convert it back into a select, and then lower it to sext machine code. Some notes about the test diffs: 1. 2010-08-04-MaskedSignedCompare.ll - We were creating control flow that didn't exist in the IR. 2. memcmp.ll - Choose -1 or 1 is the case that got me looking at this again. We could avoid the push/pop in some cases if we used 'movzbl %al' instead of an xor on a different reg? That's a post-DAG problem though. 3. mul-constant-result.ll - The trade-off between sbb+not vs. setne+neg could be addressed if that's a regression, but those would always be nearly equivalent. 4. pr22338.ll and sext-i1.ll - These tests have undef operands, so we don't actually care about these diffs. 5. sbb.ll - This shows a win for what is likely a common case: choose -1 or 0. 6. select.ll - There's another borderline case here: cmp+sbb+or vs. test+set+lea? Also, sbb+not vs. setae+neg shows up again. 7. select_const.ll - These are motivating cases for the enhancement; replace cmov with cheaper ops. Assembly differences between movzbl and xor to avoid a partial reg stall are caused later by the X86 Fixup SetCC pass. Differential Revision: https://reviews.llvm.org/D35340 llvm-svn: 310717
2017-08-11 23:44:14 +08:00
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: xorl %eax, %eax
[x86] use more shift or LEA for select-of-constants (2nd try) The previous rev (r310208) failed to account for overflow when subtracting the constants to see if they're suitable for shift/lea. This version add a check for that and more test were added in r310490. We can convert any select-of-constants to math ops: http://rise4fun.com/Alive/d7d For this patch, I'm enhancing an existing x86 transform that uses fake multiplies (they always become shl/lea) to avoid cmov or branching. The current code misses cases where we have a negative constant and a positive constant, so this is just trying to plug that hole. The DAGCombiner diff prevents us from hitting a terrible inefficiency: we can start with a select in IR, create a select DAG node, convert it into a sext, convert it back into a select, and then lower it to sext machine code. Some notes about the test diffs: 1. 2010-08-04-MaskedSignedCompare.ll - We were creating control flow that didn't exist in the IR. 2. memcmp.ll - Choose -1 or 1 is the case that got me looking at this again. We could avoid the push/pop in some cases if we used 'movzbl %al' instead of an xor on a different reg? That's a post-DAG problem though. 3. mul-constant-result.ll - The trade-off between sbb+not vs. setne+neg could be addressed if that's a regression, but those would always be nearly equivalent. 4. pr22338.ll and sext-i1.ll - These tests have undef operands, so we don't actually care about these diffs. 5. sbb.ll - This shows a win for what is likely a common case: choose -1 or 0. 6. select.ll - There's another borderline case here: cmp+sbb+or vs. test+set+lea? Also, sbb+not vs. setae+neg shows up again. 7. select_const.ll - These are motivating cases for the enhancement; replace cmov with cheaper ops. Assembly differences between movzbl and xor to avoid a partial reg stall are caused later by the X86 Fixup SetCC pass. Differential Revision: https://reviews.llvm.org/D35340 llvm-svn: 310717
2017-08-11 23:44:14 +08:00
; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: sete %al
; X32-NEXT: negl %eax
; X32-NEXT: retl
;
; X64-LABEL: t4:
; X64: # BB#0:
; X64-NEXT: cmpq $1, %rdi
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: retq
%t0 = icmp eq i64 %x, 0
%t1 = sext i1 %t0 to i32
ret i32 %t1
}
define i64 @t5(i32 %x) nounwind readnone ssp {
; X32-LABEL: t5:
; X32: # BB#0:
; X32-NEXT: cmpl $1, {{[0-9]+}}(%esp)
; X32-NEXT: sbbl %eax, %eax
; X32-NEXT: movl %eax, %edx
; X32-NEXT: retl
;
; X64-LABEL: t5:
; X64: # BB#0:
; X64-NEXT: cmpl $1, %edi
; X64-NEXT: sbbq %rax, %rax
; X64-NEXT: retq
%t0 = icmp eq i32 %x, 0
%t1 = sext i1 %t0 to i64
ret i64 %t1
}
; sext (xor Bool, -1) --> sub (zext Bool), 1
define i32 @select_0_or_1s(i1 %cond) {
; X32-LABEL: select_0_or_1s:
; X32: # BB#0:
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: andl $1, %eax
; X32-NEXT: decl %eax
; X32-NEXT: retl
;
; X64-LABEL: select_0_or_1s:
; X64: # BB#0:
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: andl $1, %edi
; X64-NEXT: leal -1(%rdi), %eax
; X64-NEXT: retq
%not = xor i1 %cond, 1
%sext = sext i1 %not to i32
ret i32 %sext
}
; sext (xor Bool, -1) --> sub (zext Bool), 1
define i32 @select_0_or_1s_zeroext(i1 zeroext %cond) {
; X32-LABEL: select_0_or_1s_zeroext:
; X32: # BB#0:
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: decl %eax
; X32-NEXT: retl
;
; X64-LABEL: select_0_or_1s_zeroext:
; X64: # BB#0:
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: decl %eax
; X64-NEXT: retq
%not = xor i1 %cond, 1
%sext = sext i1 %not to i32
ret i32 %sext
}
; sext (xor Bool, -1) --> sub (zext Bool), 1
define i32 @select_0_or_1s_signext(i1 signext %cond) {
; X32-LABEL: select_0_or_1s_signext:
; X32: # BB#0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $1, %al
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: decl %eax
; X32-NEXT: retl
;
; X64-LABEL: select_0_or_1s_signext:
; X64: # BB#0:
; X64-NEXT: andb $1, %dil
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: decl %eax
; X64-NEXT: retq
%not = xor i1 %cond, 1
%sext = sext i1 %not to i32
ret i32 %sext
}