2016-07-09 08:19:07 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2016-04-06 04:02:44 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
|
|
|
|
|
2016-04-07 10:06:53 +08:00
|
|
|
define i32 @test_add_1_cmov_slt(i64* %p, i32 %a0, i32 %a1) #0 {
|
|
|
|
; CHECK-LABEL: test_add_1_cmov_slt:
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK: # BB#0: # %entry
|
[X86] Reuse EFLAGS and form LOCKed ops when only user is SETCC.
Re-apply r265450 which caused PR27245 and was reverted in r265559
because of a wrong generalization: the fetch_and_add->add_and_fetch
combine only works in specific, but pretty common, cases:
(icmp slt x, 0) -> (icmp sle (add x, 1), 0)
(icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
(icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
(icmp sgt x, 0) -> (icmp sge (sub x, 1), 0)
Original Message:
We only generate LOCKed versions of add/sub when the result is unused.
It often happens that the result is used, but only by a comparison. We
can optimize those out by reusing EFLAGS, which lets us use the proper
instructions, instead of having to fallback to LXADD.
Instead of doing this as an MI peephole (as we do for the other
non-LOCKed (really, non-MR) forms), do it in ISel. It becomes quite
tricky later.
This also makes it eventually possible to stop expanding and/or/xor
if the only user is an icmp (also see D18141).
This uses the LOCK ISD opcodes added by r262244.
Differential Revision: http://reviews.llvm.org/D17633
llvm-svn: 265636
2016-04-07 10:07:10 +08:00
|
|
|
; CHECK-NEXT: lock incq (%rdi)
|
|
|
|
; CHECK-NEXT: cmovgl %edx, %esi
|
2016-04-07 10:06:53 +08:00
|
|
|
; CHECK-NEXT: movl %esi, %eax
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%tmp0 = atomicrmw add i64* %p, i64 1 seq_cst
|
2016-04-07 10:06:53 +08:00
|
|
|
%tmp1 = icmp slt i64 %tmp0, 0
|
|
|
|
%tmp2 = select i1 %tmp1, i32 %a0, i32 %a1
|
|
|
|
ret i32 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @test_add_1_cmov_sge(i64* %p, i32 %a0, i32 %a1) #0 {
|
|
|
|
; CHECK-LABEL: test_add_1_cmov_sge:
|
|
|
|
; CHECK: # BB#0: # %entry
|
[X86] Reuse EFLAGS and form LOCKed ops when only user is SETCC.
Re-apply r265450 which caused PR27245 and was reverted in r265559
because of a wrong generalization: the fetch_and_add->add_and_fetch
combine only works in specific, but pretty common, cases:
(icmp slt x, 0) -> (icmp sle (add x, 1), 0)
(icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
(icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
(icmp sgt x, 0) -> (icmp sge (sub x, 1), 0)
Original Message:
We only generate LOCKed versions of add/sub when the result is unused.
It often happens that the result is used, but only by a comparison. We
can optimize those out by reusing EFLAGS, which lets us use the proper
instructions, instead of having to fallback to LXADD.
Instead of doing this as an MI peephole (as we do for the other
non-LOCKed (really, non-MR) forms), do it in ISel. It becomes quite
tricky later.
This also makes it eventually possible to stop expanding and/or/xor
if the only user is an icmp (also see D18141).
This uses the LOCK ISD opcodes added by r262244.
Differential Revision: http://reviews.llvm.org/D17633
llvm-svn: 265636
2016-04-07 10:07:10 +08:00
|
|
|
; CHECK-NEXT: lock incq (%rdi)
|
|
|
|
; CHECK-NEXT: cmovlel %edx, %esi
|
2016-04-07 10:06:53 +08:00
|
|
|
; CHECK-NEXT: movl %esi, %eax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%tmp0 = atomicrmw add i64* %p, i64 1 seq_cst
|
|
|
|
%tmp1 = icmp sge i64 %tmp0, 0
|
|
|
|
%tmp2 = select i1 %tmp1, i32 %a0, i32 %a1
|
|
|
|
ret i32 %tmp2
|
2016-04-06 04:02:44 +08:00
|
|
|
}
|
|
|
|
|
2016-04-07 10:06:53 +08:00
|
|
|
define i32 @test_sub_1_cmov_sle(i64* %p, i32 %a0, i32 %a1) #0 {
|
|
|
|
; CHECK-LABEL: test_sub_1_cmov_sle:
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK: # BB#0: # %entry
|
[X86] Reuse EFLAGS and form LOCKed ops when only user is SETCC.
Re-apply r265450 which caused PR27245 and was reverted in r265559
because of a wrong generalization: the fetch_and_add->add_and_fetch
combine only works in specific, but pretty common, cases:
(icmp slt x, 0) -> (icmp sle (add x, 1), 0)
(icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
(icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
(icmp sgt x, 0) -> (icmp sge (sub x, 1), 0)
Original Message:
We only generate LOCKed versions of add/sub when the result is unused.
It often happens that the result is used, but only by a comparison. We
can optimize those out by reusing EFLAGS, which lets us use the proper
instructions, instead of having to fallback to LXADD.
Instead of doing this as an MI peephole (as we do for the other
non-LOCKed (really, non-MR) forms), do it in ISel. It becomes quite
tricky later.
This also makes it eventually possible to stop expanding and/or/xor
if the only user is an icmp (also see D18141).
This uses the LOCK ISD opcodes added by r262244.
Differential Revision: http://reviews.llvm.org/D17633
llvm-svn: 265636
2016-04-07 10:07:10 +08:00
|
|
|
; CHECK-NEXT: lock decq (%rdi)
|
|
|
|
; CHECK-NEXT: cmovgel %edx, %esi
|
2016-04-07 10:06:53 +08:00
|
|
|
; CHECK-NEXT: movl %esi, %eax
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
|
2016-04-07 10:06:53 +08:00
|
|
|
%tmp1 = icmp sle i64 %tmp0, 0
|
|
|
|
%tmp2 = select i1 %tmp1, i32 %a0, i32 %a1
|
|
|
|
ret i32 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @test_sub_1_cmov_sgt(i64* %p, i32 %a0, i32 %a1) #0 {
|
|
|
|
; CHECK-LABEL: test_sub_1_cmov_sgt:
|
|
|
|
; CHECK: # BB#0: # %entry
|
[X86] Reuse EFLAGS and form LOCKed ops when only user is SETCC.
Re-apply r265450 which caused PR27245 and was reverted in r265559
because of a wrong generalization: the fetch_and_add->add_and_fetch
combine only works in specific, but pretty common, cases:
(icmp slt x, 0) -> (icmp sle (add x, 1), 0)
(icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
(icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
(icmp sgt x, 0) -> (icmp sge (sub x, 1), 0)
Original Message:
We only generate LOCKed versions of add/sub when the result is unused.
It often happens that the result is used, but only by a comparison. We
can optimize those out by reusing EFLAGS, which lets us use the proper
instructions, instead of having to fallback to LXADD.
Instead of doing this as an MI peephole (as we do for the other
non-LOCKed (really, non-MR) forms), do it in ISel. It becomes quite
tricky later.
This also makes it eventually possible to stop expanding and/or/xor
if the only user is an icmp (also see D18141).
This uses the LOCK ISD opcodes added by r262244.
Differential Revision: http://reviews.llvm.org/D17633
llvm-svn: 265636
2016-04-07 10:07:10 +08:00
|
|
|
; CHECK-NEXT: lock decq (%rdi)
|
|
|
|
; CHECK-NEXT: cmovll %edx, %esi
|
2016-04-07 10:06:53 +08:00
|
|
|
; CHECK-NEXT: movl %esi, %eax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
|
|
|
|
%tmp1 = icmp sgt i64 %tmp0, 0
|
|
|
|
%tmp2 = select i1 %tmp1, i32 %a0, i32 %a1
|
|
|
|
ret i32 %tmp2
|
2016-04-06 04:02:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
; FIXME: (setcc slt x, 0) gets combined into shr early.
|
2016-04-07 10:06:53 +08:00
|
|
|
define i8 @test_add_1_setcc_slt(i64* %p) #0 {
|
|
|
|
; CHECK-LABEL: test_add_1_setcc_slt:
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK: # BB#0: # %entry
|
2016-04-07 10:06:53 +08:00
|
|
|
; CHECK-NEXT: movl $1, %eax
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK-NEXT: lock xaddq %rax, (%rdi)
|
|
|
|
; CHECK-NEXT: shrq $63, %rax
|
2016-07-09 08:19:07 +08:00
|
|
|
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %RAX<kill>
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
2016-04-07 10:06:53 +08:00
|
|
|
%tmp0 = atomicrmw add i64* %p, i64 1 seq_cst
|
2016-04-06 04:02:44 +08:00
|
|
|
%tmp1 = icmp slt i64 %tmp0, 0
|
|
|
|
%tmp2 = zext i1 %tmp1 to i8
|
|
|
|
ret i8 %tmp2
|
|
|
|
}
|
|
|
|
|
2016-04-07 10:06:53 +08:00
|
|
|
define i8 @test_sub_1_setcc_sgt(i64* %p) #0 {
|
|
|
|
; CHECK-LABEL: test_sub_1_setcc_sgt:
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK: # BB#0: # %entry
|
[X86] Reuse EFLAGS and form LOCKed ops when only user is SETCC.
Re-apply r265450 which caused PR27245 and was reverted in r265559
because of a wrong generalization: the fetch_and_add->add_and_fetch
combine only works in specific, but pretty common, cases:
(icmp slt x, 0) -> (icmp sle (add x, 1), 0)
(icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
(icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
(icmp sgt x, 0) -> (icmp sge (sub x, 1), 0)
Original Message:
We only generate LOCKed versions of add/sub when the result is unused.
It often happens that the result is used, but only by a comparison. We
can optimize those out by reusing EFLAGS, which lets us use the proper
instructions, instead of having to fallback to LXADD.
Instead of doing this as an MI peephole (as we do for the other
non-LOCKed (really, non-MR) forms), do it in ISel. It becomes quite
tricky later.
This also makes it eventually possible to stop expanding and/or/xor
if the only user is an icmp (also see D18141).
This uses the LOCK ISD opcodes added by r262244.
Differential Revision: http://reviews.llvm.org/D17633
llvm-svn: 265636
2016-04-07 10:07:10 +08:00
|
|
|
; CHECK-NEXT: lock decq (%rdi)
|
|
|
|
; CHECK-NEXT: setge %al
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
2016-04-07 10:06:53 +08:00
|
|
|
%tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
|
|
|
|
%tmp1 = icmp sgt i64 %tmp0, 0
|
2016-04-06 04:02:44 +08:00
|
|
|
%tmp2 = zext i1 %tmp1 to i8
|
|
|
|
ret i8 %tmp2
|
|
|
|
}
|
|
|
|
|
2016-04-07 10:06:53 +08:00
|
|
|
define i32 @test_add_1_brcond_sge(i64* %p, i32 %a0, i32 %a1) #0 {
|
|
|
|
; CHECK-LABEL: test_add_1_brcond_sge:
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK: # BB#0: # %entry
|
[X86] Reuse EFLAGS and form LOCKed ops when only user is SETCC.
Re-apply r265450 which caused PR27245 and was reverted in r265559
because of a wrong generalization: the fetch_and_add->add_and_fetch
combine only works in specific, but pretty common, cases:
(icmp slt x, 0) -> (icmp sle (add x, 1), 0)
(icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
(icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
(icmp sgt x, 0) -> (icmp sge (sub x, 1), 0)
Original Message:
We only generate LOCKed versions of add/sub when the result is unused.
It often happens that the result is used, but only by a comparison. We
can optimize those out by reusing EFLAGS, which lets us use the proper
instructions, instead of having to fallback to LXADD.
Instead of doing this as an MI peephole (as we do for the other
non-LOCKed (really, non-MR) forms), do it in ISel. It becomes quite
tricky later.
This also makes it eventually possible to stop expanding and/or/xor
if the only user is an icmp (also see D18141).
This uses the LOCK ISD opcodes added by r262244.
Differential Revision: http://reviews.llvm.org/D17633
llvm-svn: 265636
2016-04-07 10:07:10 +08:00
|
|
|
; CHECK-NEXT: lock incq (%rdi)
|
|
|
|
; CHECK-NEXT: jle .LBB6_2
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK-NEXT: # BB#1: # %t
|
|
|
|
; CHECK-NEXT: movl %esi, %eax
|
|
|
|
; CHECK-NEXT: retq
|
2016-04-07 10:06:53 +08:00
|
|
|
; CHECK-NEXT: .LBB6_2: # %f
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK-NEXT: movl %edx, %eax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
2016-04-07 10:06:53 +08:00
|
|
|
%tmp0 = atomicrmw add i64* %p, i64 1 seq_cst
|
2016-04-06 04:02:44 +08:00
|
|
|
%tmp1 = icmp sge i64 %tmp0, 0
|
|
|
|
br i1 %tmp1, label %t, label %f
|
|
|
|
t:
|
|
|
|
ret i32 %a0
|
|
|
|
f:
|
|
|
|
ret i32 %a1
|
|
|
|
}
|
|
|
|
|
|
|
|
; Also make sure we don't muck with condition codes that we should ignore.
|
|
|
|
; No need to test unsigned comparisons, as they should all be simplified.
|
|
|
|
|
|
|
|
define i32 @test_add_1_cmov_sle(i64* %p, i32 %a0, i32 %a1) #0 {
|
|
|
|
; CHECK-LABEL: test_add_1_cmov_sle:
|
|
|
|
; CHECK: # BB#0: # %entry
|
|
|
|
; CHECK-NEXT: movl $1, %eax
|
|
|
|
; CHECK-NEXT: lock xaddq %rax, (%rdi)
|
|
|
|
; CHECK-NEXT: testq %rax, %rax
|
|
|
|
; CHECK-NEXT: cmovgl %edx, %esi
|
|
|
|
; CHECK-NEXT: movl %esi, %eax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%tmp0 = atomicrmw add i64* %p, i64 1 seq_cst
|
|
|
|
%tmp1 = icmp sle i64 %tmp0, 0
|
|
|
|
%tmp2 = select i1 %tmp1, i32 %a0, i32 %a1
|
|
|
|
ret i32 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @test_add_1_cmov_sgt(i64* %p, i32 %a0, i32 %a1) #0 {
|
|
|
|
; CHECK-LABEL: test_add_1_cmov_sgt:
|
|
|
|
; CHECK: # BB#0: # %entry
|
|
|
|
; CHECK-NEXT: movl $1, %eax
|
|
|
|
; CHECK-NEXT: lock xaddq %rax, (%rdi)
|
|
|
|
; CHECK-NEXT: testq %rax, %rax
|
|
|
|
; CHECK-NEXT: cmovlel %edx, %esi
|
|
|
|
; CHECK-NEXT: movl %esi, %eax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%tmp0 = atomicrmw add i64* %p, i64 1 seq_cst
|
|
|
|
%tmp1 = icmp sgt i64 %tmp0, 0
|
|
|
|
%tmp2 = select i1 %tmp1, i32 %a0, i32 %a1
|
|
|
|
ret i32 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
; Test a result being used by more than just the comparison.
|
|
|
|
|
2016-04-07 10:06:53 +08:00
|
|
|
define i8 @test_add_1_setcc_sgt_reuse(i64* %p, i64* %p2) #0 {
|
|
|
|
; CHECK-LABEL: test_add_1_setcc_sgt_reuse:
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK: # BB#0: # %entry
|
|
|
|
; CHECK-NEXT: movl $1, %ecx
|
|
|
|
; CHECK-NEXT: lock xaddq %rcx, (%rdi)
|
|
|
|
; CHECK-NEXT: testq %rcx, %rcx
|
2016-04-07 10:06:53 +08:00
|
|
|
; CHECK-NEXT: setg %al
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK-NEXT: movq %rcx, (%rsi)
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%tmp0 = atomicrmw add i64* %p, i64 1 seq_cst
|
2016-04-07 10:06:53 +08:00
|
|
|
%tmp1 = icmp sgt i64 %tmp0, 0
|
2016-04-06 04:02:44 +08:00
|
|
|
%tmp2 = zext i1 %tmp1 to i8
|
|
|
|
store i64 %tmp0, i64* %p2
|
|
|
|
ret i8 %tmp2
|
|
|
|
}
|
|
|
|
|
2016-04-07 10:06:53 +08:00
|
|
|
define i8 @test_sub_2_setcc_sgt(i64* %p) #0 {
|
|
|
|
; CHECK-LABEL: test_sub_2_setcc_sgt:
|
|
|
|
; CHECK: # BB#0: # %entry
|
|
|
|
; CHECK-NEXT: movq $-2, %rax
|
|
|
|
; CHECK-NEXT: lock xaddq %rax, (%rdi)
|
|
|
|
; CHECK-NEXT: testq %rax, %rax
|
|
|
|
; CHECK-NEXT: setg %al
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%tmp0 = atomicrmw sub i64* %p, i64 2 seq_cst
|
|
|
|
%tmp1 = icmp sgt i64 %tmp0, 0
|
|
|
|
%tmp2 = zext i1 %tmp1 to i8
|
|
|
|
ret i8 %tmp2
|
|
|
|
}
|
|
|
|
|
2016-04-06 04:02:44 +08:00
|
|
|
attributes #0 = { nounwind }
|