2016-07-09 08:19:07 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2016-04-06 04:02:44 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
|
|
|
|
|
2016-04-07 10:06:53 +08:00
|
|
|
define i32 @test_add_1_cmov_slt(i64* %p, i32 %a0, i32 %a1) #0 {
|
|
|
|
; CHECK-LABEL: test_add_1_cmov_slt:
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK: # BB#0: # %entry
|
[X86] Reuse EFLAGS and form LOCKed ops when only user is SETCC.
Re-apply r265450 which caused PR27245 and was reverted in r265559
because of a wrong generalization: the fetch_and_add->add_and_fetch
combine only works in specific, but pretty common, cases:
(icmp slt x, 0) -> (icmp sle (add x, 1), 0)
(icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
(icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
(icmp sgt x, 0) -> (icmp sge (sub x, 1), 0)
Original Message:
We only generate LOCKed versions of add/sub when the result is unused.
It often happens that the result is used, but only by a comparison. We
can optimize those out by reusing EFLAGS, which lets us use the proper
instructions, instead of having to fallback to LXADD.
Instead of doing this as an MI peephole (as we do for the other
non-LOCKed (really, non-MR) forms), do it in ISel. It becomes quite
tricky later.
This also makes it eventually possible to stop expanding and/or/xor
if the only user is an icmp (also see D18141).
This uses the LOCK ISD opcodes added by r262244.
Differential Revision: http://reviews.llvm.org/D17633
llvm-svn: 265636
2016-04-07 10:07:10 +08:00
|
|
|
; CHECK-NEXT: lock incq (%rdi)
|
|
|
|
; CHECK-NEXT: cmovgl %edx, %esi
|
2016-04-07 10:06:53 +08:00
|
|
|
; CHECK-NEXT: movl %esi, %eax
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%tmp0 = atomicrmw add i64* %p, i64 1 seq_cst
|
2016-04-07 10:06:53 +08:00
|
|
|
%tmp1 = icmp slt i64 %tmp0, 0
|
|
|
|
%tmp2 = select i1 %tmp1, i32 %a0, i32 %a1
|
|
|
|
ret i32 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @test_add_1_cmov_sge(i64* %p, i32 %a0, i32 %a1) #0 {
|
|
|
|
; CHECK-LABEL: test_add_1_cmov_sge:
|
|
|
|
; CHECK: # BB#0: # %entry
|
[X86] Reuse EFLAGS and form LOCKed ops when only user is SETCC.
Re-apply r265450 which caused PR27245 and was reverted in r265559
because of a wrong generalization: the fetch_and_add->add_and_fetch
combine only works in specific, but pretty common, cases:
(icmp slt x, 0) -> (icmp sle (add x, 1), 0)
(icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
(icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
(icmp sgt x, 0) -> (icmp sge (sub x, 1), 0)
Original Message:
We only generate LOCKed versions of add/sub when the result is unused.
It often happens that the result is used, but only by a comparison. We
can optimize those out by reusing EFLAGS, which lets us use the proper
instructions, instead of having to fallback to LXADD.
Instead of doing this as an MI peephole (as we do for the other
non-LOCKed (really, non-MR) forms), do it in ISel. It becomes quite
tricky later.
This also makes it eventually possible to stop expanding and/or/xor
if the only user is an icmp (also see D18141).
This uses the LOCK ISD opcodes added by r262244.
Differential Revision: http://reviews.llvm.org/D17633
llvm-svn: 265636
2016-04-07 10:07:10 +08:00
|
|
|
; CHECK-NEXT: lock incq (%rdi)
|
|
|
|
; CHECK-NEXT: cmovlel %edx, %esi
|
2016-04-07 10:06:53 +08:00
|
|
|
; CHECK-NEXT: movl %esi, %eax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%tmp0 = atomicrmw add i64* %p, i64 1 seq_cst
|
|
|
|
%tmp1 = icmp sge i64 %tmp0, 0
|
|
|
|
%tmp2 = select i1 %tmp1, i32 %a0, i32 %a1
|
|
|
|
ret i32 %tmp2
|
2016-04-06 04:02:44 +08:00
|
|
|
}
|
|
|
|
|
2016-04-07 10:06:53 +08:00
|
|
|
define i32 @test_sub_1_cmov_sle(i64* %p, i32 %a0, i32 %a1) #0 {
|
|
|
|
; CHECK-LABEL: test_sub_1_cmov_sle:
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK: # BB#0: # %entry
|
[X86] Reuse EFLAGS and form LOCKed ops when only user is SETCC.
Re-apply r265450 which caused PR27245 and was reverted in r265559
because of a wrong generalization: the fetch_and_add->add_and_fetch
combine only works in specific, but pretty common, cases:
(icmp slt x, 0) -> (icmp sle (add x, 1), 0)
(icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
(icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
(icmp sgt x, 0) -> (icmp sge (sub x, 1), 0)
Original Message:
We only generate LOCKed versions of add/sub when the result is unused.
It often happens that the result is used, but only by a comparison. We
can optimize those out by reusing EFLAGS, which lets us use the proper
instructions, instead of having to fallback to LXADD.
Instead of doing this as an MI peephole (as we do for the other
non-LOCKed (really, non-MR) forms), do it in ISel. It becomes quite
tricky later.
This also makes it eventually possible to stop expanding and/or/xor
if the only user is an icmp (also see D18141).
This uses the LOCK ISD opcodes added by r262244.
Differential Revision: http://reviews.llvm.org/D17633
llvm-svn: 265636
2016-04-07 10:07:10 +08:00
|
|
|
; CHECK-NEXT: lock decq (%rdi)
|
|
|
|
; CHECK-NEXT: cmovgel %edx, %esi
|
2016-04-07 10:06:53 +08:00
|
|
|
; CHECK-NEXT: movl %esi, %eax
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
|
2016-04-07 10:06:53 +08:00
|
|
|
%tmp1 = icmp sle i64 %tmp0, 0
|
|
|
|
%tmp2 = select i1 %tmp1, i32 %a0, i32 %a1
|
|
|
|
ret i32 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @test_sub_1_cmov_sgt(i64* %p, i32 %a0, i32 %a1) #0 {
|
|
|
|
; CHECK-LABEL: test_sub_1_cmov_sgt:
|
|
|
|
; CHECK: # BB#0: # %entry
|
[X86] Reuse EFLAGS and form LOCKed ops when only user is SETCC.
Re-apply r265450 which caused PR27245 and was reverted in r265559
because of a wrong generalization: the fetch_and_add->add_and_fetch
combine only works in specific, but pretty common, cases:
(icmp slt x, 0) -> (icmp sle (add x, 1), 0)
(icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
(icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
(icmp sgt x, 0) -> (icmp sge (sub x, 1), 0)
Original Message:
We only generate LOCKed versions of add/sub when the result is unused.
It often happens that the result is used, but only by a comparison. We
can optimize those out by reusing EFLAGS, which lets us use the proper
instructions, instead of having to fallback to LXADD.
Instead of doing this as an MI peephole (as we do for the other
non-LOCKed (really, non-MR) forms), do it in ISel. It becomes quite
tricky later.
This also makes it eventually possible to stop expanding and/or/xor
if the only user is an icmp (also see D18141).
This uses the LOCK ISD opcodes added by r262244.
Differential Revision: http://reviews.llvm.org/D17633
llvm-svn: 265636
2016-04-07 10:07:10 +08:00
|
|
|
; CHECK-NEXT: lock decq (%rdi)
|
|
|
|
; CHECK-NEXT: cmovll %edx, %esi
|
2016-04-07 10:06:53 +08:00
|
|
|
; CHECK-NEXT: movl %esi, %eax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
|
|
|
|
%tmp1 = icmp sgt i64 %tmp0, 0
|
|
|
|
%tmp2 = select i1 %tmp1, i32 %a0, i32 %a1
|
|
|
|
ret i32 %tmp2
|
2016-04-06 04:02:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
; FIXME: (setcc slt x, 0) gets combined into shr early.
|
2016-04-07 10:06:53 +08:00
|
|
|
define i8 @test_add_1_setcc_slt(i64* %p) #0 {
|
|
|
|
; CHECK-LABEL: test_add_1_setcc_slt:
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK: # BB#0: # %entry
|
2016-04-07 10:06:53 +08:00
|
|
|
; CHECK-NEXT: movl $1, %eax
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK-NEXT: lock xaddq %rax, (%rdi)
|
|
|
|
; CHECK-NEXT: shrq $63, %rax
|
2016-07-09 08:19:07 +08:00
|
|
|
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %RAX<kill>
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
2016-04-07 10:06:53 +08:00
|
|
|
%tmp0 = atomicrmw add i64* %p, i64 1 seq_cst
|
2016-04-06 04:02:44 +08:00
|
|
|
%tmp1 = icmp slt i64 %tmp0, 0
|
|
|
|
%tmp2 = zext i1 %tmp1 to i8
|
|
|
|
ret i8 %tmp2
|
|
|
|
}
|
|
|
|
|
2016-04-07 10:06:53 +08:00
|
|
|
define i8 @test_sub_1_setcc_sgt(i64* %p) #0 {
|
|
|
|
; CHECK-LABEL: test_sub_1_setcc_sgt:
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK: # BB#0: # %entry
|
[X86] Reuse EFLAGS and form LOCKed ops when only user is SETCC.
Re-apply r265450 which caused PR27245 and was reverted in r265559
because of a wrong generalization: the fetch_and_add->add_and_fetch
combine only works in specific, but pretty common, cases:
(icmp slt x, 0) -> (icmp sle (add x, 1), 0)
(icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
(icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
(icmp sgt x, 0) -> (icmp sge (sub x, 1), 0)
Original Message:
We only generate LOCKed versions of add/sub when the result is unused.
It often happens that the result is used, but only by a comparison. We
can optimize those out by reusing EFLAGS, which lets us use the proper
instructions, instead of having to fallback to LXADD.
Instead of doing this as an MI peephole (as we do for the other
non-LOCKed (really, non-MR) forms), do it in ISel. It becomes quite
tricky later.
This also makes it eventually possible to stop expanding and/or/xor
if the only user is an icmp (also see D18141).
This uses the LOCK ISD opcodes added by r262244.
Differential Revision: http://reviews.llvm.org/D17633
llvm-svn: 265636
2016-04-07 10:07:10 +08:00
|
|
|
; CHECK-NEXT: lock decq (%rdi)
|
|
|
|
; CHECK-NEXT: setge %al
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
2016-04-07 10:06:53 +08:00
|
|
|
%tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
|
|
|
|
%tmp1 = icmp sgt i64 %tmp0, 0
|
2016-04-06 04:02:44 +08:00
|
|
|
%tmp2 = zext i1 %tmp1 to i8
|
|
|
|
ret i8 %tmp2
|
|
|
|
}
|
|
|
|
|
2016-04-07 10:06:53 +08:00
|
|
|
define i32 @test_add_1_brcond_sge(i64* %p, i32 %a0, i32 %a1) #0 {
|
|
|
|
; CHECK-LABEL: test_add_1_brcond_sge:
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK: # BB#0: # %entry
|
[X86] Reuse EFLAGS and form LOCKed ops when only user is SETCC.
Re-apply r265450 which caused PR27245 and was reverted in r265559
because of a wrong generalization: the fetch_and_add->add_and_fetch
combine only works in specific, but pretty common, cases:
(icmp slt x, 0) -> (icmp sle (add x, 1), 0)
(icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
(icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
(icmp sgt x, 0) -> (icmp sge (sub x, 1), 0)
Original Message:
We only generate LOCKed versions of add/sub when the result is unused.
It often happens that the result is used, but only by a comparison. We
can optimize those out by reusing EFLAGS, which lets us use the proper
instructions, instead of having to fallback to LXADD.
Instead of doing this as an MI peephole (as we do for the other
non-LOCKed (really, non-MR) forms), do it in ISel. It becomes quite
tricky later.
This also makes it eventually possible to stop expanding and/or/xor
if the only user is an icmp (also see D18141).
This uses the LOCK ISD opcodes added by r262244.
Differential Revision: http://reviews.llvm.org/D17633
llvm-svn: 265636
2016-04-07 10:07:10 +08:00
|
|
|
; CHECK-NEXT: lock incq (%rdi)
|
|
|
|
; CHECK-NEXT: jle .LBB6_2
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK-NEXT: # BB#1: # %t
|
|
|
|
; CHECK-NEXT: movl %esi, %eax
|
|
|
|
; CHECK-NEXT: retq
|
2016-04-07 10:06:53 +08:00
|
|
|
; CHECK-NEXT: .LBB6_2: # %f
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK-NEXT: movl %edx, %eax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
2016-04-07 10:06:53 +08:00
|
|
|
%tmp0 = atomicrmw add i64* %p, i64 1 seq_cst
|
2016-04-06 04:02:44 +08:00
|
|
|
%tmp1 = icmp sge i64 %tmp0, 0
|
|
|
|
br i1 %tmp1, label %t, label %f
|
|
|
|
t:
|
|
|
|
ret i32 %a0
|
|
|
|
f:
|
|
|
|
ret i32 %a1
|
|
|
|
}
|
|
|
|
|
|
|
|
; Also make sure we don't muck with condition codes that we should ignore.
|
|
|
|
; No need to test unsigned comparisons, as they should all be simplified.
|
|
|
|
|
|
|
|
define i32 @test_add_1_cmov_sle(i64* %p, i32 %a0, i32 %a1) #0 {
|
|
|
|
; CHECK-LABEL: test_add_1_cmov_sle:
|
|
|
|
; CHECK: # BB#0: # %entry
|
|
|
|
; CHECK-NEXT: movl $1, %eax
|
|
|
|
; CHECK-NEXT: lock xaddq %rax, (%rdi)
|
|
|
|
; CHECK-NEXT: testq %rax, %rax
|
|
|
|
; CHECK-NEXT: cmovgl %edx, %esi
|
|
|
|
; CHECK-NEXT: movl %esi, %eax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%tmp0 = atomicrmw add i64* %p, i64 1 seq_cst
|
|
|
|
%tmp1 = icmp sle i64 %tmp0, 0
|
|
|
|
%tmp2 = select i1 %tmp1, i32 %a0, i32 %a1
|
|
|
|
ret i32 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @test_add_1_cmov_sgt(i64* %p, i32 %a0, i32 %a1) #0 {
|
|
|
|
; CHECK-LABEL: test_add_1_cmov_sgt:
|
|
|
|
; CHECK: # BB#0: # %entry
|
|
|
|
; CHECK-NEXT: movl $1, %eax
|
|
|
|
; CHECK-NEXT: lock xaddq %rax, (%rdi)
|
|
|
|
; CHECK-NEXT: testq %rax, %rax
|
|
|
|
; CHECK-NEXT: cmovlel %edx, %esi
|
|
|
|
; CHECK-NEXT: movl %esi, %eax
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%tmp0 = atomicrmw add i64* %p, i64 1 seq_cst
|
|
|
|
%tmp1 = icmp sgt i64 %tmp0, 0
|
|
|
|
%tmp2 = select i1 %tmp1, i32 %a0, i32 %a1
|
|
|
|
ret i32 %tmp2
|
|
|
|
}
|
|
|
|
|
|
|
|
; Test a result being used by more than just the comparison.
|
|
|
|
|
2016-04-07 10:06:53 +08:00
|
|
|
define i8 @test_add_1_setcc_sgt_reuse(i64* %p, i64* %p2) #0 {
|
|
|
|
; CHECK-LABEL: test_add_1_setcc_sgt_reuse:
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK: # BB#0: # %entry
|
|
|
|
; CHECK-NEXT: movl $1, %ecx
|
|
|
|
; CHECK-NEXT: lock xaddq %rcx, (%rdi)
|
|
|
|
; CHECK-NEXT: testq %rcx, %rcx
|
2016-04-07 10:06:53 +08:00
|
|
|
; CHECK-NEXT: setg %al
|
2016-04-06 04:02:44 +08:00
|
|
|
; CHECK-NEXT: movq %rcx, (%rsi)
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%tmp0 = atomicrmw add i64* %p, i64 1 seq_cst
|
2016-04-07 10:06:53 +08:00
|
|
|
%tmp1 = icmp sgt i64 %tmp0, 0
|
2016-04-06 04:02:44 +08:00
|
|
|
%tmp2 = zext i1 %tmp1 to i8
|
|
|
|
store i64 %tmp0, i64* %p2
|
|
|
|
ret i8 %tmp2
|
|
|
|
}
|
|
|
|
|
2016-04-07 10:06:53 +08:00
|
|
|
define i8 @test_sub_2_setcc_sgt(i64* %p) #0 {
|
|
|
|
; CHECK-LABEL: test_sub_2_setcc_sgt:
|
|
|
|
; CHECK: # BB#0: # %entry
|
|
|
|
; CHECK-NEXT: movq $-2, %rax
|
|
|
|
; CHECK-NEXT: lock xaddq %rax, (%rdi)
|
|
|
|
; CHECK-NEXT: testq %rax, %rax
|
|
|
|
; CHECK-NEXT: setg %al
|
|
|
|
; CHECK-NEXT: retq
|
|
|
|
entry:
|
|
|
|
%tmp0 = atomicrmw sub i64* %p, i64 2 seq_cst
|
|
|
|
%tmp1 = icmp sgt i64 %tmp0, 0
|
|
|
|
%tmp2 = zext i1 %tmp1 to i8
|
|
|
|
ret i8 %tmp2
|
|
|
|
}
|
|
|
|
|
[X86] Dont run combineSetCCAtomicArith() when the cmp has multiple uses
We would miscompile the following:
void g(int);
int f(volatile long long *p) {
bool b = __atomic_fetch_add(p, 1, __ATOMIC_SEQ_CST) < 0;
g(b ? 12 : 34);
return b ? 56 : 78;
}
into
pushq %rax
lock incq (%rdi)
movl $12, %eax
movl $34, %edi
cmovlel %eax, %edi
callq g(int)
testq %rax, %rax <---- Bad.
movl $56, %ecx
movl $78, %eax
cmovsl %ecx, %eax
popq %rcx
retq
because the code failed to take into account that the cmp has multiple
uses, replaced one of them, and left the other one comparing garbage.
llvm-svn: 291630
2017-01-11 08:49:54 +08:00
|
|
|
define i8 @test_add_1_cmov_cmov(i64* %p, i8* %q) #0 {
|
|
|
|
; TODO: It's possible to use "lock inc" here, but both cmovs need to be updated.
|
|
|
|
; CHECK-LABEL: test_add_1_cmov_cmov:
|
|
|
|
; CHECK: # BB#0: # %entry
|
|
|
|
; CHECK-NEXT: movl $1, %eax
|
|
|
|
; CHECK-NEXT: lock xaddq %rax, (%rdi)
|
|
|
|
; CHECK-NEXT: testq %rax, %rax
|
|
|
|
entry:
|
|
|
|
%add = atomicrmw add i64* %p, i64 1 seq_cst
|
|
|
|
%cmp = icmp slt i64 %add, 0
|
|
|
|
%s1 = select i1 %cmp, i8 12, i8 34
|
|
|
|
store i8 %s1, i8* %q
|
|
|
|
%s2 = select i1 %cmp, i8 56, i8 78
|
|
|
|
ret i8 %s2
|
|
|
|
}
|
|
|
|
|
2016-04-06 04:02:44 +08:00
|
|
|
attributes #0 = { nounwind }
|