2018-08-03 09:20:32 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2017-08-02 08:28:10 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X64
|
|
|
|
; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X32
|
2014-09-26 01:27:43 +08:00
|
|
|
|
|
|
|
; On x86, an atomic rmw operation that does not modify the value in memory
|
|
|
|
; (such as atomic add 0) can be replaced by an mfence followed by a mov.
|
|
|
|
; This is explained (with the motivation for such an optimization) in
|
|
|
|
; http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
|
|
|
|
|
|
|
|
define i8 @add8(i8* %p) {
|
2018-08-03 09:20:32 +08:00
|
|
|
; X64-LABEL: add8:
|
|
|
|
; X64: # %bb.0:
|
|
|
|
; X64-NEXT: mfence
|
|
|
|
; X64-NEXT: movb (%rdi), %al
|
|
|
|
; X64-NEXT: retq
|
|
|
|
;
|
|
|
|
; X32-LABEL: add8:
|
|
|
|
; X32: # %bb.0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: mfence
|
|
|
|
; X32-NEXT: movb (%eax), %al
|
|
|
|
; X32-NEXT: retl
|
2014-09-26 01:27:43 +08:00
|
|
|
%1 = atomicrmw add i8* %p, i8 0 monotonic
|
|
|
|
ret i8 %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i16 @or16(i16* %p) {
|
2018-08-03 09:20:32 +08:00
|
|
|
; X64-LABEL: or16:
|
|
|
|
; X64: # %bb.0:
|
|
|
|
; X64-NEXT: mfence
|
2018-08-04 05:40:44 +08:00
|
|
|
; X64-NEXT: movzwl (%rdi), %eax
|
2018-08-03 09:20:32 +08:00
|
|
|
; X64-NEXT: retq
|
|
|
|
;
|
|
|
|
; X32-LABEL: or16:
|
|
|
|
; X32: # %bb.0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: mfence
|
2018-08-04 05:40:44 +08:00
|
|
|
; X32-NEXT: movzwl (%eax), %eax
|
2018-08-03 09:20:32 +08:00
|
|
|
; X32-NEXT: retl
|
2014-09-26 01:27:43 +08:00
|
|
|
%1 = atomicrmw or i16* %p, i16 0 acquire
|
|
|
|
ret i16 %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i32 @xor32(i32* %p) {
|
2018-08-03 09:20:32 +08:00
|
|
|
; X64-LABEL: xor32:
|
|
|
|
; X64: # %bb.0:
|
|
|
|
; X64-NEXT: mfence
|
|
|
|
; X64-NEXT: movl (%rdi), %eax
|
|
|
|
; X64-NEXT: retq
|
|
|
|
;
|
|
|
|
; X32-LABEL: xor32:
|
|
|
|
; X32: # %bb.0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: mfence
|
|
|
|
; X32-NEXT: movl (%eax), %eax
|
|
|
|
; X32-NEXT: retl
|
2014-09-26 01:27:43 +08:00
|
|
|
%1 = atomicrmw xor i32* %p, i32 0 release
|
|
|
|
ret i32 %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @sub64(i64* %p) {
|
2018-08-03 09:20:32 +08:00
|
|
|
; X64-LABEL: sub64:
|
|
|
|
; X64: # %bb.0:
|
|
|
|
; X64-NEXT: mfence
|
|
|
|
; X64-NEXT: movq (%rdi), %rax
|
|
|
|
; X64-NEXT: retq
|
|
|
|
;
|
|
|
|
; X32-LABEL: sub64:
|
|
|
|
; X32: # %bb.0:
|
|
|
|
; X32-NEXT: pushl %ebx
|
|
|
|
; X32-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; X32-NEXT: pushl %esi
|
|
|
|
; X32-NEXT: .cfi_def_cfa_offset 12
|
|
|
|
; X32-NEXT: .cfi_offset %esi, -12
|
|
|
|
; X32-NEXT: .cfi_offset %ebx, -8
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
|
|
; X32-NEXT: movl (%esi), %eax
|
|
|
|
; X32-NEXT: movl 4(%esi), %edx
|
|
|
|
; X32-NEXT: .p2align 4, 0x90
|
|
|
|
; X32-NEXT: .LBB3_1: # %atomicrmw.start
|
|
|
|
; X32-NEXT: # =>This Inner Loop Header: Depth=1
|
|
|
|
; X32-NEXT: movl %edx, %ecx
|
|
|
|
; X32-NEXT: movl %eax, %ebx
|
|
|
|
; X32-NEXT: lock cmpxchg8b (%esi)
|
|
|
|
; X32-NEXT: jne .LBB3_1
|
|
|
|
; X32-NEXT: # %bb.2: # %atomicrmw.end
|
|
|
|
; X32-NEXT: popl %esi
|
|
|
|
; X32-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; X32-NEXT: popl %ebx
|
|
|
|
; X32-NEXT: .cfi_def_cfa_offset 4
|
|
|
|
; X32-NEXT: retl
|
2014-09-26 01:27:43 +08:00
|
|
|
%1 = atomicrmw sub i64* %p, i64 0 seq_cst
|
|
|
|
ret i64 %1
|
|
|
|
}
|
|
|
|
|
|
|
|
define i128 @or128(i128* %p) {
|
2018-08-03 09:20:32 +08:00
|
|
|
; X64-LABEL: or128:
|
|
|
|
; X64: # %bb.0:
|
|
|
|
; X64-NEXT: pushq %rax
|
|
|
|
; X64-NEXT: .cfi_def_cfa_offset 16
|
|
|
|
; X64-NEXT: xorl %esi, %esi
|
|
|
|
; X64-NEXT: xorl %edx, %edx
|
|
|
|
; X64-NEXT: callq __sync_fetch_and_or_16
|
|
|
|
; X64-NEXT: popq %rcx
|
|
|
|
; X64-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; X64-NEXT: retq
|
|
|
|
;
|
|
|
|
; X32-LABEL: or128:
|
|
|
|
; X32: # %bb.0:
|
|
|
|
; X32-NEXT: pushl %ebp
|
|
|
|
; X32-NEXT: .cfi_def_cfa_offset 8
|
|
|
|
; X32-NEXT: .cfi_offset %ebp, -8
|
|
|
|
; X32-NEXT: movl %esp, %ebp
|
|
|
|
; X32-NEXT: .cfi_def_cfa_register %ebp
|
|
|
|
; X32-NEXT: pushl %edi
|
|
|
|
; X32-NEXT: pushl %esi
|
|
|
|
; X32-NEXT: andl $-8, %esp
|
|
|
|
; X32-NEXT: subl $16, %esp
|
|
|
|
; X32-NEXT: .cfi_offset %esi, -16
|
|
|
|
; X32-NEXT: .cfi_offset %edi, -12
|
|
|
|
; X32-NEXT: movl 8(%ebp), %esi
|
|
|
|
; X32-NEXT: movl %esp, %eax
|
|
|
|
; X32-NEXT: pushl $0
|
|
|
|
; X32-NEXT: pushl $0
|
|
|
|
; X32-NEXT: pushl $0
|
|
|
|
; X32-NEXT: pushl $0
|
|
|
|
; X32-NEXT: pushl 12(%ebp)
|
|
|
|
; X32-NEXT: pushl %eax
|
|
|
|
; X32-NEXT: calll __sync_fetch_and_or_16
|
|
|
|
; X32-NEXT: addl $20, %esp
|
|
|
|
; X32-NEXT: movl (%esp), %eax
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; X32-NEXT: movl %edi, 8(%esi)
|
|
|
|
; X32-NEXT: movl %edx, 12(%esi)
|
2018-08-03 09:20:32 +08:00
|
|
|
; X32-NEXT: movl %eax, (%esi)
|
[DAGCombiner] If a TokenFactor would be merged into its user, consider the user later.
Summary:
A number of optimizations are inhibited by single-use TokenFactors not
being merged into the TokenFactor using it. This makes we consider if
we can do the merge immediately.
Most tests changes here are due to the change in visitation causing
minor reorderings and associated reassociation of paired memory
operations.
CodeGen tests with non-reordering changes:
X86/aligned-variadic.ll -- memory-based add folded into stored leaq
value.
X86/constant-combiners.ll -- Optimizes out overlap between stores.
X86/pr40631_deadstore_elision -- folds constant byte store into
preceding quad word constant store.
Reviewers: RKSimon, craig.topper, spatel, efriedma, courbet
Reviewed By: courbet
Subscribers: dylanmckay, sdardis, nemanjai, jvesely, nhaehnle, javed.absar, eraman, hiraditya, kbarton, jrtc27, atanasyan, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59260
llvm-svn: 356068
2019-03-14 01:07:09 +08:00
|
|
|
; X32-NEXT: movl %ecx, 4(%esi)
|
2018-08-03 09:20:32 +08:00
|
|
|
; X32-NEXT: movl %esi, %eax
|
|
|
|
; X32-NEXT: leal -8(%ebp), %esp
|
|
|
|
; X32-NEXT: popl %esi
|
|
|
|
; X32-NEXT: popl %edi
|
|
|
|
; X32-NEXT: popl %ebp
|
|
|
|
; X32-NEXT: .cfi_def_cfa %esp, 4
|
|
|
|
; X32-NEXT: retl $4
|
2014-09-26 01:27:43 +08:00
|
|
|
%1 = atomicrmw or i128* %p, i128 0 monotonic
|
|
|
|
ret i128 %1
|
|
|
|
}
|
|
|
|
|
|
|
|
; For 'and', the idempotent value is (-1)
|
|
|
|
define i32 @and32 (i32* %p) {
|
2018-08-03 09:20:32 +08:00
|
|
|
; X64-LABEL: and32:
|
|
|
|
; X64: # %bb.0:
|
|
|
|
; X64-NEXT: mfence
|
|
|
|
; X64-NEXT: movl (%rdi), %eax
|
|
|
|
; X64-NEXT: retq
|
|
|
|
;
|
|
|
|
; X32-LABEL: and32:
|
|
|
|
; X32: # %bb.0:
|
|
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
|
|
; X32-NEXT: mfence
|
|
|
|
; X32-NEXT: movl (%eax), %eax
|
|
|
|
; X32-NEXT: retl
|
2014-09-26 01:27:43 +08:00
|
|
|
%1 = atomicrmw and i32* %p, i32 -1 acq_rel
|
|
|
|
ret i32 %1
|
|
|
|
}
|