2017-02-12 03:27:15 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2014-07-02 02:53:31 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s
|
|
|
|
|
|
|
|
@var = global i128 0
|
|
|
|
|
[X86] Make sure we do not clobber RBX with cmpxchg when used as a base pointer.
cmpxchg[8|16]b uses RBX as one of its argument.
In other words, using this instruction clobbers RBX as it is defined to hold one
the input. When the backend uses dynamically allocated stack, RBX is used as a
reserved register for the base pointer.
Reserved registers have special semantic that only the target understands and
enforces, because of that, the register allocator don’t use them, but also,
don’t try to make sure they are used properly (remember it does not know how
they are supposed to be used).
Therefore, when RBX is used as a reserved register but defined by something that
is not compatible with that use, the register allocator will not fix the
surrounding code to make sure it gets saved and restored properly around the
broken code. This is the responsibility of the target to do the right thing with
its reserved register.
To fix that, when the base pointer needs to be preserved, we use a different
pseudo instruction for cmpxchg that save rbx.
That pseudo takes two more arguments than the regular instruction:
- One is the value to be copied into RBX to set the proper value for the
comparison.
- The other is the virtual register holding the save of the value of RBX as the
base pointer. This saving is done as part of isel (i.e., we emit a copy from
rbx).
cmpxchg_save_rbx <regular cmpxchg args>, input_for_rbx_reg, save_of_rbx_as_bp
This gets expanded into:
rbx = copy input_for_rbx_reg
cmpxchg <regular cmpxchg args>
rbx = save_of_rbx_as_bp
Note: The actual modeling of the pseudo is a bit more complicated to make sure
the interferes that appears after the pseudo gets expanded are properly modeled
before that expansion.
This fixes PR26883.
llvm-svn: 263325
2016-03-12 10:25:27 +08:00
|
|
|
; Due to the scheduling right after isel for cmpxchg and given the
|
|
|
|
; machine scheduler and copy coalescer do not mess up with physical
|
|
|
|
; register live-ranges, we end up with a useless copy.
|
2017-02-12 03:27:15 +08:00
|
|
|
define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {
|
|
|
|
; CHECK-LABEL: val_compare_and_swap:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: Lcfi0:
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: Lcfi1:
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -16
|
|
|
|
; CHECK-NEXT: movq %rcx, %r9
|
|
|
|
; CHECK-NEXT: movq %rsi, %rax
|
|
|
|
; CHECK-NEXT: movq %r8, %rcx
|
|
|
|
; CHECK-NEXT: movq %r9, %rbx
|
|
|
|
; CHECK-NEXT: lock cmpxchg16b (%rdi)
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: retq
|
2014-07-02 02:53:31 +08:00
|
|
|
%pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
|
|
|
|
%val = extractvalue { i128, i1 } %pair, 0
|
|
|
|
ret i128 %val
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @fetch_and_nand(i128* %p, i128 %bits) {
|
|
|
|
; CHECK-LABEL: fetch_and_nand:
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: Lcfi2:
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: Lcfi3:
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -16
|
|
|
|
; CHECK-NEXT: movq %rdx, %r8
|
|
|
|
; CHECK-NEXT: movq (%rdi), %rax
|
|
|
|
; CHECK-NEXT: movq 8(%rdi), %rdx
|
|
|
|
; CHECK-NEXT: .p2align 4, 0x90
|
|
|
|
; CHECK-NEXT: LBB1_1: ## %atomicrmw.start
|
|
|
|
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
|
|
|
; CHECK-NEXT: movq %rdx, %rcx
|
|
|
|
; CHECK-NEXT: andq %r8, %rcx
|
|
|
|
; CHECK-NEXT: movq %rax, %rbx
|
|
|
|
; CHECK-NEXT: andq %rsi, %rbx
|
|
|
|
; CHECK-NEXT: notq %rbx
|
|
|
|
; CHECK-NEXT: notq %rcx
|
|
|
|
; CHECK-NEXT: lock cmpxchg16b (%rdi)
|
|
|
|
; CHECK-NEXT: jne LBB1_1
|
|
|
|
; CHECK-NEXT: ## BB#2: ## %atomicrmw.end
|
|
|
|
; CHECK-NEXT: movq %rax, {{.*}}(%rip)
|
|
|
|
; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip)
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: retq
|
2014-07-02 02:53:31 +08:00
|
|
|
%val = atomicrmw nand i128* %p, i128 %bits release
|
|
|
|
store i128 %val, i128* @var, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @fetch_and_or(i128* %p, i128 %bits) {
|
|
|
|
; CHECK-LABEL: fetch_and_or:
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: Lcfi4:
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: Lcfi5:
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -16
|
|
|
|
; CHECK-NEXT: movq %rdx, %r8
|
|
|
|
; CHECK-NEXT: movq (%rdi), %rax
|
|
|
|
; CHECK-NEXT: movq 8(%rdi), %rdx
|
|
|
|
; CHECK-NEXT: .p2align 4, 0x90
|
|
|
|
; CHECK-NEXT: LBB2_1: ## %atomicrmw.start
|
|
|
|
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
|
|
|
; CHECK-NEXT: movq %rax, %rbx
|
|
|
|
; CHECK-NEXT: orq %rsi, %rbx
|
|
|
|
; CHECK-NEXT: movq %rdx, %rcx
|
|
|
|
; CHECK-NEXT: orq %r8, %rcx
|
|
|
|
; CHECK-NEXT: lock cmpxchg16b (%rdi)
|
|
|
|
; CHECK-NEXT: jne LBB2_1
|
|
|
|
; CHECK-NEXT: ## BB#2: ## %atomicrmw.end
|
|
|
|
; CHECK-NEXT: movq %rax, {{.*}}(%rip)
|
|
|
|
; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip)
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: retq
|
2014-07-02 02:53:31 +08:00
|
|
|
%val = atomicrmw or i128* %p, i128 %bits seq_cst
|
|
|
|
store i128 %val, i128* @var, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @fetch_and_add(i128* %p, i128 %bits) {
|
|
|
|
; CHECK-LABEL: fetch_and_add:
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: Lcfi6:
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: Lcfi7:
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -16
|
|
|
|
; CHECK-NEXT: movq %rdx, %r8
|
|
|
|
; CHECK-NEXT: movq (%rdi), %rax
|
|
|
|
; CHECK-NEXT: movq 8(%rdi), %rdx
|
|
|
|
; CHECK-NEXT: .p2align 4, 0x90
|
|
|
|
; CHECK-NEXT: LBB3_1: ## %atomicrmw.start
|
|
|
|
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
|
|
|
; CHECK-NEXT: movq %rax, %rbx
|
|
|
|
; CHECK-NEXT: addq %rsi, %rbx
|
|
|
|
; CHECK-NEXT: movq %rdx, %rcx
|
|
|
|
; CHECK-NEXT: adcq %r8, %rcx
|
|
|
|
; CHECK-NEXT: lock cmpxchg16b (%rdi)
|
|
|
|
; CHECK-NEXT: jne LBB3_1
|
|
|
|
; CHECK-NEXT: ## BB#2: ## %atomicrmw.end
|
|
|
|
; CHECK-NEXT: movq %rax, {{.*}}(%rip)
|
|
|
|
; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip)
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: retq
|
2014-07-02 02:53:31 +08:00
|
|
|
%val = atomicrmw add i128* %p, i128 %bits seq_cst
|
|
|
|
store i128 %val, i128* @var, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @fetch_and_sub(i128* %p, i128 %bits) {
|
|
|
|
; CHECK-LABEL: fetch_and_sub:
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: Lcfi8:
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: Lcfi9:
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -16
|
|
|
|
; CHECK-NEXT: movq %rdx, %r8
|
|
|
|
; CHECK-NEXT: movq (%rdi), %rax
|
|
|
|
; CHECK-NEXT: movq 8(%rdi), %rdx
|
|
|
|
; CHECK-NEXT: .p2align 4, 0x90
|
|
|
|
; CHECK-NEXT: LBB4_1: ## %atomicrmw.start
|
|
|
|
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
|
|
|
; CHECK-NEXT: movq %rax, %rbx
|
|
|
|
; CHECK-NEXT: subq %rsi, %rbx
|
|
|
|
; CHECK-NEXT: movq %rdx, %rcx
|
|
|
|
; CHECK-NEXT: sbbq %r8, %rcx
|
|
|
|
; CHECK-NEXT: lock cmpxchg16b (%rdi)
|
|
|
|
; CHECK-NEXT: jne LBB4_1
|
|
|
|
; CHECK-NEXT: ## BB#2: ## %atomicrmw.end
|
|
|
|
; CHECK-NEXT: movq %rax, {{.*}}(%rip)
|
|
|
|
; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip)
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: retq
|
2014-07-02 02:53:31 +08:00
|
|
|
%val = atomicrmw sub i128* %p, i128 %bits seq_cst
|
|
|
|
store i128 %val, i128* @var, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @fetch_and_min(i128* %p, i128 %bits) {
|
|
|
|
; CHECK-LABEL: fetch_and_min:
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: Lcfi10:
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: Lcfi11:
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -16
|
|
|
|
; CHECK-NEXT: movq %rdx, %r8
|
|
|
|
; CHECK-NEXT: movq (%rdi), %rax
|
|
|
|
; CHECK-NEXT: movq 8(%rdi), %rdx
|
|
|
|
; CHECK-NEXT: .p2align 4, 0x90
|
|
|
|
; CHECK-NEXT: LBB5_1: ## %atomicrmw.start
|
|
|
|
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
|
|
|
; CHECK-NEXT: cmpq %rax, %rsi
|
|
|
|
; CHECK-NEXT: movq %r8, %rcx
|
|
|
|
; CHECK-NEXT: sbbq %rdx, %rcx
|
|
|
|
; CHECK-NEXT: movq %r8, %rcx
|
[x86] Fix an amazing goof in the handling of sub, or, and xor lowering.
The comment for this code indicated that it should work similar to our
handling of add lowering above: if we see uses of an instruction other
than flag usage and store usage, it tries to avoid the specialized
X86ISD::* nodes that are designed for flag+op modeling and emits an
explicit test.
Problem is, only the add case actually did this. In all the other cases,
the logic was incomplete and inverted. Any time the value was used by
a store, we bailed on the specialized X86ISD node. All of this appears
to have been historical where we had different logic here. =/
Turns out, we have quite a few patterns designed around these nodes. We
should actually form them. I fixed the code to match what we do for add,
and it has quite a positive effect just within some of our test cases.
The only thing close to a regression I see is using:
notl %r
testl %r, %r
instead of:
xorl -1, %r
But we can add a pattern or something to fold that back out. The
improvements seem more than worth this.
I've also worked with Craig to update the comments to no longer be
actively contradicted by the code. =[ Some of this still remains
a mystery to both Craig and myself, but this seems like a large step in
the direction of consistency and slightly more accurate comments.
Many thanks to Craig for help figuring out this nasty stuff.
Differential Revision: https://reviews.llvm.org/D37096
llvm-svn: 311737
2017-08-25 08:34:07 +08:00
|
|
|
; CHECK-NEXT: cmovgeq %rdx, %rcx
|
|
|
|
; CHECK-NEXT: movq %rsi, %rbx
|
|
|
|
; CHECK-NEXT: cmovgeq %rax, %rbx
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK-NEXT: lock cmpxchg16b (%rdi)
|
|
|
|
; CHECK-NEXT: jne LBB5_1
|
[x86] Fix an amazing goof in the handling of sub, or, and xor lowering.
The comment for this code indicated that it should work similar to our
handling of add lowering above: if we see uses of an instruction other
than flag usage and store usage, it tries to avoid the specialized
X86ISD::* nodes that are designed for flag+op modeling and emits an
explicit test.
Problem is, only the add case actually did this. In all the other cases,
the logic was incomplete and inverted. Any time the value was used by
a store, we bailed on the specialized X86ISD node. All of this appears
to have been historical where we had different logic here. =/
Turns out, we have quite a few patterns designed around these nodes. We
should actually form them. I fixed the code to match what we do for add,
and it has quite a positive effect just within some of our test cases.
The only thing close to a regression I see is using:
notl %r
testl %r, %r
instead of:
xorl -1, %r
But we can add a pattern or something to fold that back out. The
improvements seem more than worth this.
I've also worked with Craig to update the comments to no longer be
actively contradicted by the code. =[ Some of this still remains
a mystery to both Craig and myself, but this seems like a large step in
the direction of consistency and slightly more accurate comments.
Many thanks to Craig for help figuring out this nasty stuff.
Differential Revision: https://reviews.llvm.org/D37096
llvm-svn: 311737
2017-08-25 08:34:07 +08:00
|
|
|
; CHECK-NEXT: ## BB#2: ## %atomicrmw.end
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK-NEXT: movq %rax, {{.*}}(%rip)
|
|
|
|
; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip)
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: retq
|
2014-07-02 02:53:31 +08:00
|
|
|
%val = atomicrmw min i128* %p, i128 %bits seq_cst
|
|
|
|
store i128 %val, i128* @var, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @fetch_and_max(i128* %p, i128 %bits) {
|
|
|
|
; CHECK-LABEL: fetch_and_max:
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: Lcfi12:
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: Lcfi13:
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -16
|
|
|
|
; CHECK-NEXT: movq %rdx, %r8
|
|
|
|
; CHECK-NEXT: movq (%rdi), %rax
|
|
|
|
; CHECK-NEXT: movq 8(%rdi), %rdx
|
|
|
|
; CHECK-NEXT: .p2align 4, 0x90
|
|
|
|
; CHECK-NEXT: LBB6_1: ## %atomicrmw.start
|
|
|
|
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
|
|
|
; CHECK-NEXT: cmpq %rsi, %rax
|
|
|
|
; CHECK-NEXT: movq %rdx, %rcx
|
|
|
|
; CHECK-NEXT: sbbq %r8, %rcx
|
|
|
|
; CHECK-NEXT: movq %r8, %rcx
|
[x86] Fix an amazing goof in the handling of sub, or, and xor lowering.
The comment for this code indicated that it should work similar to our
handling of add lowering above: if we see uses of an instruction other
than flag usage and store usage, it tries to avoid the specialized
X86ISD::* nodes that are designed for flag+op modeling and emits an
explicit test.
Problem is, only the add case actually did this. In all the other cases,
the logic was incomplete and inverted. Any time the value was used by
a store, we bailed on the specialized X86ISD node. All of this appears
to have been historical where we had different logic here. =/
Turns out, we have quite a few patterns designed around these nodes. We
should actually form them. I fixed the code to match what we do for add,
and it has quite a positive effect just within some of our test cases.
The only thing close to a regression I see is using:
notl %r
testl %r, %r
instead of:
xorl -1, %r
But we can add a pattern or something to fold that back out. The
improvements seem more than worth this.
I've also worked with Craig to update the comments to no longer be
actively contradicted by the code. =[ Some of this still remains
a mystery to both Craig and myself, but this seems like a large step in
the direction of consistency and slightly more accurate comments.
Many thanks to Craig for help figuring out this nasty stuff.
Differential Revision: https://reviews.llvm.org/D37096
llvm-svn: 311737
2017-08-25 08:34:07 +08:00
|
|
|
; CHECK-NEXT: cmovgeq %rdx, %rcx
|
|
|
|
; CHECK-NEXT: movq %rsi, %rbx
|
|
|
|
; CHECK-NEXT: cmovgeq %rax, %rbx
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK-NEXT: lock cmpxchg16b (%rdi)
|
|
|
|
; CHECK-NEXT: jne LBB6_1
|
[x86] Fix an amazing goof in the handling of sub, or, and xor lowering.
The comment for this code indicated that it should work similar to our
handling of add lowering above: if we see uses of an instruction other
than flag usage and store usage, it tries to avoid the specialized
X86ISD::* nodes that are designed for flag+op modeling and emits an
explicit test.
Problem is, only the add case actually did this. In all the other cases,
the logic was incomplete and inverted. Any time the value was used by
a store, we bailed on the specialized X86ISD node. All of this appears
to have been historical where we had different logic here. =/
Turns out, we have quite a few patterns designed around these nodes. We
should actually form them. I fixed the code to match what we do for add,
and it has quite a positive effect just within some of our test cases.
The only thing close to a regression I see is using:
notl %r
testl %r, %r
instead of:
xorl -1, %r
But we can add a pattern or something to fold that back out. The
improvements seem more than worth this.
I've also worked with Craig to update the comments to no longer be
actively contradicted by the code. =[ Some of this still remains
a mystery to both Craig and myself, but this seems like a large step in
the direction of consistency and slightly more accurate comments.
Many thanks to Craig for help figuring out this nasty stuff.
Differential Revision: https://reviews.llvm.org/D37096
llvm-svn: 311737
2017-08-25 08:34:07 +08:00
|
|
|
; CHECK-NEXT: ## BB#2: ## %atomicrmw.end
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK-NEXT: movq %rax, {{.*}}(%rip)
|
|
|
|
; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip)
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: retq
|
2014-07-02 02:53:31 +08:00
|
|
|
%val = atomicrmw max i128* %p, i128 %bits seq_cst
|
|
|
|
store i128 %val, i128* @var, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @fetch_and_umin(i128* %p, i128 %bits) {
|
|
|
|
; CHECK-LABEL: fetch_and_umin:
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: Lcfi14:
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: Lcfi15:
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -16
|
|
|
|
; CHECK-NEXT: movq %rdx, %r8
|
|
|
|
; CHECK-NEXT: movq (%rdi), %rax
|
|
|
|
; CHECK-NEXT: movq 8(%rdi), %rdx
|
|
|
|
; CHECK-NEXT: .p2align 4, 0x90
|
|
|
|
; CHECK-NEXT: LBB7_1: ## %atomicrmw.start
|
|
|
|
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
|
|
|
; CHECK-NEXT: cmpq %rax, %rsi
|
|
|
|
; CHECK-NEXT: movq %r8, %rcx
|
|
|
|
; CHECK-NEXT: sbbq %rdx, %rcx
|
|
|
|
; CHECK-NEXT: movq %r8, %rcx
|
[x86] Fix an amazing goof in the handling of sub, or, and xor lowering.
The comment for this code indicated that it should work similar to our
handling of add lowering above: if we see uses of an instruction other
than flag usage and store usage, it tries to avoid the specialized
X86ISD::* nodes that are designed for flag+op modeling and emits an
explicit test.
Problem is, only the add case actually did this. In all the other cases,
the logic was incomplete and inverted. Any time the value was used by
a store, we bailed on the specialized X86ISD node. All of this appears
to have been historical where we had different logic here. =/
Turns out, we have quite a few patterns designed around these nodes. We
should actually form them. I fixed the code to match what we do for add,
and it has quite a positive effect just within some of our test cases.
The only thing close to a regression I see is using:
notl %r
testl %r, %r
instead of:
xorl -1, %r
But we can add a pattern or something to fold that back out. The
improvements seem more than worth this.
I've also worked with Craig to update the comments to no longer be
actively contradicted by the code. =[ Some of this still remains
a mystery to both Craig and myself, but this seems like a large step in
the direction of consistency and slightly more accurate comments.
Many thanks to Craig for help figuring out this nasty stuff.
Differential Revision: https://reviews.llvm.org/D37096
llvm-svn: 311737
2017-08-25 08:34:07 +08:00
|
|
|
; CHECK-NEXT: cmovaeq %rdx, %rcx
|
|
|
|
; CHECK-NEXT: movq %rsi, %rbx
|
|
|
|
; CHECK-NEXT: cmovaeq %rax, %rbx
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK-NEXT: lock cmpxchg16b (%rdi)
|
|
|
|
; CHECK-NEXT: jne LBB7_1
|
[x86] Fix an amazing goof in the handling of sub, or, and xor lowering.
The comment for this code indicated that it should work similar to our
handling of add lowering above: if we see uses of an instruction other
than flag usage and store usage, it tries to avoid the specialized
X86ISD::* nodes that are designed for flag+op modeling and emits an
explicit test.
Problem is, only the add case actually did this. In all the other cases,
the logic was incomplete and inverted. Any time the value was used by
a store, we bailed on the specialized X86ISD node. All of this appears
to have been historical where we had different logic here. =/
Turns out, we have quite a few patterns designed around these nodes. We
should actually form them. I fixed the code to match what we do for add,
and it has quite a positive effect just within some of our test cases.
The only thing close to a regression I see is using:
notl %r
testl %r, %r
instead of:
xorl -1, %r
But we can add a pattern or something to fold that back out. The
improvements seem more than worth this.
I've also worked with Craig to update the comments to no longer be
actively contradicted by the code. =[ Some of this still remains
a mystery to both Craig and myself, but this seems like a large step in
the direction of consistency and slightly more accurate comments.
Many thanks to Craig for help figuring out this nasty stuff.
Differential Revision: https://reviews.llvm.org/D37096
llvm-svn: 311737
2017-08-25 08:34:07 +08:00
|
|
|
; CHECK-NEXT: ## BB#2: ## %atomicrmw.end
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK-NEXT: movq %rax, {{.*}}(%rip)
|
|
|
|
; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip)
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: retq
|
2014-07-02 02:53:31 +08:00
|
|
|
%val = atomicrmw umin i128* %p, i128 %bits seq_cst
|
|
|
|
store i128 %val, i128* @var, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @fetch_and_umax(i128* %p, i128 %bits) {
|
|
|
|
; CHECK-LABEL: fetch_and_umax:
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: Lcfi16:
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: Lcfi17:
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -16
|
|
|
|
; CHECK-NEXT: movq %rdx, %r8
|
|
|
|
; CHECK-NEXT: movq (%rdi), %rax
|
|
|
|
; CHECK-NEXT: movq 8(%rdi), %rdx
|
|
|
|
; CHECK-NEXT: .p2align 4, 0x90
|
|
|
|
; CHECK-NEXT: LBB8_1: ## %atomicrmw.start
|
|
|
|
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
|
|
|
; CHECK-NEXT: cmpq %rax, %rsi
|
|
|
|
; CHECK-NEXT: movq %r8, %rcx
|
|
|
|
; CHECK-NEXT: sbbq %rdx, %rcx
|
|
|
|
; CHECK-NEXT: movq %r8, %rcx
|
[x86] Fix an amazing goof in the handling of sub, or, and xor lowering.
The comment for this code indicated that it should work similar to our
handling of add lowering above: if we see uses of an instruction other
than flag usage and store usage, it tries to avoid the specialized
X86ISD::* nodes that are designed for flag+op modeling and emits an
explicit test.
Problem is, only the add case actually did this. In all the other cases,
the logic was incomplete and inverted. Any time the value was used by
a store, we bailed on the specialized X86ISD node. All of this appears
to have been historical where we had different logic here. =/
Turns out, we have quite a few patterns designed around these nodes. We
should actually form them. I fixed the code to match what we do for add,
and it has quite a positive effect just within some of our test cases.
The only thing close to a regression I see is using:
notl %r
testl %r, %r
instead of:
xorl -1, %r
But we can add a pattern or something to fold that back out. The
improvements seem more than worth this.
I've also worked with Craig to update the comments to no longer be
actively contradicted by the code. =[ Some of this still remains
a mystery to both Craig and myself, but this seems like a large step in
the direction of consistency and slightly more accurate comments.
Many thanks to Craig for help figuring out this nasty stuff.
Differential Revision: https://reviews.llvm.org/D37096
llvm-svn: 311737
2017-08-25 08:34:07 +08:00
|
|
|
; CHECK-NEXT: cmovbq %rdx, %rcx
|
|
|
|
; CHECK-NEXT: movq %rsi, %rbx
|
|
|
|
; CHECK-NEXT: cmovbq %rax, %rbx
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK-NEXT: lock cmpxchg16b (%rdi)
|
|
|
|
; CHECK-NEXT: jne LBB8_1
|
[x86] Fix an amazing goof in the handling of sub, or, and xor lowering.
The comment for this code indicated that it should work similar to our
handling of add lowering above: if we see uses of an instruction other
than flag usage and store usage, it tries to avoid the specialized
X86ISD::* nodes that are designed for flag+op modeling and emits an
explicit test.
Problem is, only the add case actually did this. In all the other cases,
the logic was incomplete and inverted. Any time the value was used by
a store, we bailed on the specialized X86ISD node. All of this appears
to have been historical where we had different logic here. =/
Turns out, we have quite a few patterns designed around these nodes. We
should actually form them. I fixed the code to match what we do for add,
and it has quite a positive effect just within some of our test cases.
The only thing close to a regression I see is using:
notl %r
testl %r, %r
instead of:
xorl -1, %r
But we can add a pattern or something to fold that back out. The
improvements seem more than worth this.
I've also worked with Craig to update the comments to no longer be
actively contradicted by the code. =[ Some of this still remains
a mystery to both Craig and myself, but this seems like a large step in
the direction of consistency and slightly more accurate comments.
Many thanks to Craig for help figuring out this nasty stuff.
Differential Revision: https://reviews.llvm.org/D37096
llvm-svn: 311737
2017-08-25 08:34:07 +08:00
|
|
|
; CHECK-NEXT: ## BB#2: ## %atomicrmw.end
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK-NEXT: movq %rax, {{.*}}(%rip)
|
|
|
|
; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip)
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: retq
|
2014-07-02 02:53:31 +08:00
|
|
|
%val = atomicrmw umax i128* %p, i128 %bits seq_cst
|
|
|
|
store i128 %val, i128* @var, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define i128 @atomic_load_seq_cst(i128* %p) {
|
|
|
|
; CHECK-LABEL: atomic_load_seq_cst:
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: Lcfi18:
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: Lcfi19:
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -16
|
|
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
|
|
; CHECK-NEXT: xorl %edx, %edx
|
|
|
|
; CHECK-NEXT: xorl %ecx, %ecx
|
|
|
|
; CHECK-NEXT: xorl %ebx, %ebx
|
|
|
|
; CHECK-NEXT: lock cmpxchg16b (%rdi)
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%r = load atomic i128, i128* %p seq_cst, align 16
|
2014-07-02 02:53:31 +08:00
|
|
|
ret i128 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define i128 @atomic_load_relaxed(i128* %p) {
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK-LABEL: atomic_load_relaxed:
|
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: Lcfi20:
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: Lcfi21:
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -16
|
|
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
|
|
; CHECK-NEXT: xorl %edx, %edx
|
|
|
|
; CHECK-NEXT: xorl %ecx, %ecx
|
|
|
|
; CHECK-NEXT: xorl %ebx, %ebx
|
|
|
|
; CHECK-NEXT: lock cmpxchg16b (%rdi)
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%r = load atomic i128, i128* %p monotonic, align 16
|
2014-07-02 02:53:31 +08:00
|
|
|
ret i128 %r
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @atomic_store_seq_cst(i128* %p, i128 %in) {
|
|
|
|
; CHECK-LABEL: atomic_store_seq_cst:
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: Lcfi22:
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: Lcfi23:
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -16
|
|
|
|
; CHECK-NEXT: movq %rdx, %rcx
|
|
|
|
; CHECK-NEXT: movq %rsi, %rbx
|
|
|
|
; CHECK-NEXT: movq (%rdi), %rax
|
|
|
|
; CHECK-NEXT: movq 8(%rdi), %rdx
|
|
|
|
; CHECK-NEXT: .p2align 4, 0x90
|
|
|
|
; CHECK-NEXT: LBB11_1: ## %atomicrmw.start
|
|
|
|
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
|
|
|
; CHECK-NEXT: lock cmpxchg16b (%rdi)
|
|
|
|
; CHECK-NEXT: jne LBB11_1
|
|
|
|
; CHECK-NEXT: ## BB#2: ## %atomicrmw.end
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: retq
|
2014-07-02 02:53:31 +08:00
|
|
|
store atomic i128 %in, i128* %p seq_cst, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @atomic_store_release(i128* %p, i128 %in) {
|
|
|
|
; CHECK-LABEL: atomic_store_release:
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: Lcfi24:
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: Lcfi25:
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -16
|
|
|
|
; CHECK-NEXT: movq %rdx, %rcx
|
|
|
|
; CHECK-NEXT: movq %rsi, %rbx
|
|
|
|
; CHECK-NEXT: movq (%rdi), %rax
|
|
|
|
; CHECK-NEXT: movq 8(%rdi), %rdx
|
|
|
|
; CHECK-NEXT: .p2align 4, 0x90
|
|
|
|
; CHECK-NEXT: LBB12_1: ## %atomicrmw.start
|
|
|
|
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
|
|
|
; CHECK-NEXT: lock cmpxchg16b (%rdi)
|
|
|
|
; CHECK-NEXT: jne LBB12_1
|
|
|
|
; CHECK-NEXT: ## BB#2: ## %atomicrmw.end
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: retq
|
2014-07-02 02:53:31 +08:00
|
|
|
store atomic i128 %in, i128* %p release, align 16
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @atomic_store_relaxed(i128* %p, i128 %in) {
|
|
|
|
; CHECK-LABEL: atomic_store_relaxed:
|
2017-02-12 03:27:15 +08:00
|
|
|
; CHECK: ## BB#0:
|
|
|
|
; CHECK-NEXT: pushq %rbx
|
|
|
|
; CHECK-NEXT: Lcfi26:
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; CHECK-NEXT: Lcfi27:
|
|
|
|
; CHECK-NEXT: .cfi_offset %rbx, -16
|
|
|
|
; CHECK-NEXT: movq %rdx, %rcx
|
|
|
|
; CHECK-NEXT: movq %rsi, %rbx
|
|
|
|
; CHECK-NEXT: movq (%rdi), %rax
|
|
|
|
; CHECK-NEXT: movq 8(%rdi), %rdx
|
|
|
|
; CHECK-NEXT: .p2align 4, 0x90
|
|
|
|
; CHECK-NEXT: LBB13_1: ## %atomicrmw.start
|
|
|
|
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
|
|
|
; CHECK-NEXT: lock cmpxchg16b (%rdi)
|
|
|
|
; CHECK-NEXT: jne LBB13_1
|
|
|
|
; CHECK-NEXT: ## BB#2: ## %atomicrmw.end
|
|
|
|
; CHECK-NEXT: popq %rbx
|
|
|
|
; CHECK-NEXT: retq
|
2014-07-02 02:53:31 +08:00
|
|
|
store atomic i128 %in, i128* %p unordered, align 16
|
|
|
|
ret void
|
|
|
|
}
|