llvm-project/llvm/test/CodeGen/X86/atomic-load-store-wide.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mcpu=corei7 -mtriple=i686-- -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=NOSSE

; 64-bit load/store on x86-32
; FIXME: The generated code can be substantially improved.

define void @test1(i64* %ptr, i64 %val1) {
; SSE42-LABEL: test1:
; SSE42:       # %bb.0:
; SSE42-NEXT:    movl {{[0-9]+}}(%esp), %eax
; SSE42-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE42-NEXT:    movlps %xmm0, (%eax)
; SSE42-NEXT:    lock orl $0, (%esp)
; SSE42-NEXT:    retl
;
; NOSSE-LABEL: test1:
; NOSSE:       # %bb.0:
; NOSSE-NEXT:    pushl %ebx
; NOSSE-NEXT:    .cfi_def_cfa_offset 8
; NOSSE-NEXT:    pushl %esi
; NOSSE-NEXT:    .cfi_def_cfa_offset 12
; NOSSE-NEXT:    .cfi_offset %esi, -12
; NOSSE-NEXT:    .cfi_offset %ebx, -8
; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx
; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
; NOSSE-NEXT:    movl (%esi), %eax
; NOSSE-NEXT:    movl 4(%esi), %edx
; NOSSE-NEXT:    .p2align 4, 0x90
; NOSSE-NEXT:  .LBB0_1: # %atomicrmw.start
; NOSSE-NEXT:    # =>This Inner Loop Header: Depth=1
; NOSSE-NEXT:    lock cmpxchg8b (%esi)
; NOSSE-NEXT:    jne .LBB0_1
; NOSSE-NEXT:  # %bb.2: # %atomicrmw.end
; NOSSE-NEXT:    popl %esi
; NOSSE-NEXT:    .cfi_def_cfa_offset 8
; NOSSE-NEXT:    popl %ebx
; NOSSE-NEXT:    .cfi_def_cfa_offset 4
; NOSSE-NEXT:    retl
  store atomic i64 %val1, i64* %ptr seq_cst, align 8
  ret void
}

define i64 @test2(i64* %ptr) {
; SSE42-LABEL: test2:
; SSE42:       # %bb.0:
; SSE42-NEXT:    movl {{[0-9]+}}(%esp), %eax
; SSE42-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
; SSE42-NEXT:    movd %xmm0, %eax
; SSE42-NEXT:    pextrd $1, %xmm0, %edx
; SSE42-NEXT:    retl
;
; NOSSE-LABEL: test2:
; NOSSE:       # %bb.0:
; NOSSE-NEXT:    pushl %ebp
; NOSSE-NEXT:    .cfi_def_cfa_offset 8
; NOSSE-NEXT:    .cfi_offset %ebp, -8
; NOSSE-NEXT:    movl %esp, %ebp
; NOSSE-NEXT:    .cfi_def_cfa_register %ebp
; NOSSE-NEXT:    andl $-8, %esp
; NOSSE-NEXT:    subl $8, %esp
; NOSSE-NEXT:    movl 8(%ebp), %eax
; NOSSE-NEXT:    fildll (%eax)
; NOSSE-NEXT:    fistpll (%esp)
; NOSSE-NEXT:    movl (%esp), %eax
; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
; NOSSE-NEXT:    movl %ebp, %esp
; NOSSE-NEXT:    popl %ebp
; NOSSE-NEXT:    .cfi_def_cfa %esp, 4
; NOSSE-NEXT:    retl
  %val = load atomic i64, i64* %ptr seq_cst, align 8
  ret i64 %val
}

; Same as test2, but with noimplicitfloat.
define i64 @test3(i64* %ptr) noimplicitfloat {
; CHECK-LABEL: test3:
; CHECK:       # %bb.0:
; CHECK-NEXT:    pushl %ebx
; CHECK-NEXT:    .cfi_def_cfa_offset 8
; CHECK-NEXT:    pushl %esi
; CHECK-NEXT:    .cfi_def_cfa_offset 12
; CHECK-NEXT:    .cfi_offset %esi, -12
; CHECK-NEXT:    .cfi_offset %ebx, -8
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT:    xorl %eax, %eax
; CHECK-NEXT:    xorl %edx, %edx
; CHECK-NEXT:    xorl %ecx, %ecx
; CHECK-NEXT:    xorl %ebx, %ebx
; CHECK-NEXT:    lock cmpxchg8b (%esi)
; CHECK-NEXT:    popl %esi
; CHECK-NEXT:    .cfi_def_cfa_offset 8
; CHECK-NEXT:    popl %ebx
; CHECK-NEXT:    .cfi_def_cfa_offset 4
; CHECK-NEXT:    retl
  %val = load atomic i64, i64* %ptr seq_cst, align 8
  ret i64 %val
}

define i64 @test4(i64* %ptr) {
; SSE42-LABEL: test4:
; SSE42:       # %bb.0:
; SSE42-NEXT:    movl {{[0-9]+}}(%esp), %eax
; SSE42-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
; SSE42-NEXT:    movd %xmm0, %eax
; SSE42-NEXT:    pextrd $1, %xmm0, %edx
; SSE42-NEXT:    retl
;
; NOSSE-LABEL: test4:
; NOSSE:       # %bb.0:
; NOSSE-NEXT:    pushl %ebp
; NOSSE-NEXT:    .cfi_def_cfa_offset 8
; NOSSE-NEXT:    .cfi_offset %ebp, -8
; NOSSE-NEXT:    movl %esp, %ebp
; NOSSE-NEXT:    .cfi_def_cfa_register %ebp
; NOSSE-NEXT:    andl $-8, %esp
; NOSSE-NEXT:    subl $8, %esp
; NOSSE-NEXT:    movl 8(%ebp), %eax
; NOSSE-NEXT:    fildll (%eax)
; NOSSE-NEXT:    fistpll (%esp)
; NOSSE-NEXT:    movl (%esp), %eax
; NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
; NOSSE-NEXT:    movl %ebp, %esp
; NOSSE-NEXT:    popl %ebp
; NOSSE-NEXT:    .cfi_def_cfa %esp, 4
; NOSSE-NEXT:    retl
  %val = load atomic volatile i64, i64* %ptr seq_cst, align 8
  ret i64 %val
}
[X86] Autogenerate complete checks. NFC llvm-svn: 356723 2019-03-22 07:09:56 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
[X86] Use movq for i64 atomic load on 32-bit targets when sse2 is enable We used a lock cmpxchg8b to do i64 atomic loads. But if we have SSE2 we can do better and use a plain movq to do the load instead. I tried to just use an f64 atomic load and add isel patterns to MOVSD(which the domain fixing pass can turn to MOVQ), but the atomic_load SDNode in TargetSelectionDAG.td requires the type to be integer. So I've emitted VZEXT_LOAD instead which should be selected by isel to a MOVQ. Hopefully we don't need a specific atomic flavor of this. I kept the memory operand from the original AtomicSDNode. I wasn't sure if I might need to set the MOVolatile flag? I've left some FIXMEs for improvements we can do without SSE2. Differential Revision: https://reviews.llvm.org/D59679 llvm-svn: 356807 2019-03-23 04:46:56 +08:00			`; RUN: llc < %s -mcpu=corei7 -mtriple=i686-- -verify-machineinstrs \| FileCheck %s --check-prefix=CHECK --check-prefix=SSE42`
			`; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs \| FileCheck %s --check-prefix=CHECK --check-prefix=NOSSE`
Hook up 64-bit atomic load/store on x86-32. I plan to write more efficient implementations eventually. llvm-svn: 138505 2011-08-25 06:33:28 +08:00
			`; 64-bit load/store on x86-32`
			`; FIXME: The generated code can be substantially improved.`

			`define void @test1(i64* %ptr, i64 %val1) {`
[X86] Use MOVQ for i64 atomic_stores when SSE2 is enabled Summary: If we have SSE2 we can use a MOVQ to store 64-bits and avoid falling back to a cmpxchg8b loop. If its a seq_cst store we need to insert an mfence after the store. Reviewers: spatel, RKSimon, reames, jfb, efriedma Reviewed By: RKSimon Subscribers: hiraditya, dexonsmith, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60546 llvm-svn: 359368 2019-04-27 11:38:15 +08:00			`; SSE42-LABEL: test1:`
			`; SSE42: # %bb.0:`
			`; SSE42-NEXT: movl {{[0-9]+}}(%esp), %eax`
			`; SSE42-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero`
			`; SSE42-NEXT: movlps %xmm0, (%eax)`
[X86] Prefer locked stack op over mfence for seq_cst 64-bit stores on 32-bit targets This is a follow on to D58632, with the same logic. Given a memory operation which needs ordering, but doesn't need to modify any particular address, prefer to use a locked stack op over an mfence. Differential Revision: https://reviews.llvm.org/D61863 llvm-svn: 360649 2019-05-14 12:43:37 +08:00			`; SSE42-NEXT: lock orl $0, (%esp)`
[X86] Use MOVQ for i64 atomic_stores when SSE2 is enabled Summary: If we have SSE2 we can use a MOVQ to store 64-bits and avoid falling back to a cmpxchg8b loop. If its a seq_cst store we need to insert an mfence after the store. Reviewers: spatel, RKSimon, reames, jfb, efriedma Reviewed By: RKSimon Subscribers: hiraditya, dexonsmith, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60546 llvm-svn: 359368 2019-04-27 11:38:15 +08:00			`; SSE42-NEXT: retl`
			`;`
			`; NOSSE-LABEL: test1:`
			`; NOSSE: # %bb.0:`
			`; NOSSE-NEXT: pushl %ebx`
			`; NOSSE-NEXT: .cfi_def_cfa_offset 8`
			`; NOSSE-NEXT: pushl %esi`
			`; NOSSE-NEXT: .cfi_def_cfa_offset 12`
			`; NOSSE-NEXT: .cfi_offset %esi, -12`
			`; NOSSE-NEXT: .cfi_offset %ebx, -8`
			`; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx`
			`; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx`
			`; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi`
			`; NOSSE-NEXT: movl (%esi), %eax`
			`; NOSSE-NEXT: movl 4(%esi), %edx`
			`; NOSSE-NEXT: .p2align 4, 0x90`
			`; NOSSE-NEXT: .LBB0_1: # %atomicrmw.start`
			`; NOSSE-NEXT: # =>This Inner Loop Header: Depth=1`
			`; NOSSE-NEXT: lock cmpxchg8b (%esi)`
			`; NOSSE-NEXT: jne .LBB0_1`
			`; NOSSE-NEXT: # %bb.2: # %atomicrmw.end`
			`; NOSSE-NEXT: popl %esi`
			`; NOSSE-NEXT: .cfi_def_cfa_offset 8`
			`; NOSSE-NEXT: popl %ebx`
			`; NOSSE-NEXT: .cfi_def_cfa_offset 4`
			`; NOSSE-NEXT: retl`
Error out on CodeGen of unaligned load/store. Fix test so it isn't accidentally testing that case. llvm-svn: 139641 2011-09-14 04:50:54 +08:00			`store atomic i64 %val1, i64* %ptr seq_cst, align 8`
Hook up 64-bit atomic load/store on x86-32. I plan to write more efficient implementations eventually. llvm-svn: 138505 2011-08-25 06:33:28 +08:00			`ret void`
			`}`

			`define i64 @test2(i64* %ptr) {`
[X86] Use movq for i64 atomic load on 32-bit targets when sse2 is enable We used a lock cmpxchg8b to do i64 atomic loads. But if we have SSE2 we can do better and use a plain movq to do the load instead. I tried to just use an f64 atomic load and add isel patterns to MOVSD(which the domain fixing pass can turn to MOVQ), but the atomic_load SDNode in TargetSelectionDAG.td requires the type to be integer. So I've emitted VZEXT_LOAD instead which should be selected by isel to a MOVQ. Hopefully we don't need a specific atomic flavor of this. I kept the memory operand from the original AtomicSDNode. I wasn't sure if I might need to set the MOVolatile flag? I've left some FIXMEs for improvements we can do without SSE2. Differential Revision: https://reviews.llvm.org/D59679 llvm-svn: 356807 2019-03-23 04:46:56 +08:00			`; SSE42-LABEL: test2:`
			`; SSE42: # %bb.0:`
			`; SSE42-NEXT: movl {{[0-9]+}}(%esp), %eax`
			`; SSE42-NEXT: movq {{.*#+}} xmm0 = mem[0],zero`
			`; SSE42-NEXT: movd %xmm0, %eax`
			`; SSE42-NEXT: pextrd $1, %xmm0, %edx`
			`; SSE42-NEXT: retl`
			`;`
			`; NOSSE-LABEL: test2:`
			`; NOSSE: # %bb.0:`
Recommit r358211 "[X86] Use FILD/FIST to implement i64 atomic load on 32-bit targets with X87, but no SSE2" With correct test checks this time. If we have X87, but not SSE2 we can atomicaly load an i64 value into the significand of an 80-bit extended precision x87 register using fild. We can then use a fist instruction to convert it back to an i64 integ This matches what gcc and icc do for this case and removes an existing FIXME. llvm-svn: 358214 2019-04-12 03:19:42 +08:00			`; NOSSE-NEXT: pushl %ebp`
[X86] Use movq for i64 atomic load on 32-bit targets when sse2 is enable We used a lock cmpxchg8b to do i64 atomic loads. But if we have SSE2 we can do better and use a plain movq to do the load instead. I tried to just use an f64 atomic load and add isel patterns to MOVSD(which the domain fixing pass can turn to MOVQ), but the atomic_load SDNode in TargetSelectionDAG.td requires the type to be integer. So I've emitted VZEXT_LOAD instead which should be selected by isel to a MOVQ. Hopefully we don't need a specific atomic flavor of this. I kept the memory operand from the original AtomicSDNode. I wasn't sure if I might need to set the MOVolatile flag? I've left some FIXMEs for improvements we can do without SSE2. Differential Revision: https://reviews.llvm.org/D59679 llvm-svn: 356807 2019-03-23 04:46:56 +08:00			`; NOSSE-NEXT: .cfi_def_cfa_offset 8`
Recommit r358211 "[X86] Use FILD/FIST to implement i64 atomic load on 32-bit targets with X87, but no SSE2" With correct test checks this time. If we have X87, but not SSE2 we can atomicaly load an i64 value into the significand of an 80-bit extended precision x87 register using fild. We can then use a fist instruction to convert it back to an i64 integ This matches what gcc and icc do for this case and removes an existing FIXME. llvm-svn: 358214 2019-04-12 03:19:42 +08:00			`; NOSSE-NEXT: .cfi_offset %ebp, -8`
			`; NOSSE-NEXT: movl %esp, %ebp`
			`; NOSSE-NEXT: .cfi_def_cfa_register %ebp`
			`; NOSSE-NEXT: andl $-8, %esp`
			`; NOSSE-NEXT: subl $8, %esp`
			`; NOSSE-NEXT: movl 8(%ebp), %eax`
			`; NOSSE-NEXT: fildll (%eax)`
			`; NOSSE-NEXT: fistpll (%esp)`
			`; NOSSE-NEXT: movl (%esp), %eax`
			`; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx`
			`; NOSSE-NEXT: movl %ebp, %esp`
			`; NOSSE-NEXT: popl %ebp`
			`; NOSSE-NEXT: .cfi_def_cfa %esp, 4`
[X86] Use movq for i64 atomic load on 32-bit targets when sse2 is enable We used a lock cmpxchg8b to do i64 atomic loads. But if we have SSE2 we can do better and use a plain movq to do the load instead. I tried to just use an f64 atomic load and add isel patterns to MOVSD(which the domain fixing pass can turn to MOVQ), but the atomic_load SDNode in TargetSelectionDAG.td requires the type to be integer. So I've emitted VZEXT_LOAD instead which should be selected by isel to a MOVQ. Hopefully we don't need a specific atomic flavor of this. I kept the memory operand from the original AtomicSDNode. I wasn't sure if I might need to set the MOVolatile flag? I've left some FIXMEs for improvements we can do without SSE2. Differential Revision: https://reviews.llvm.org/D59679 llvm-svn: 356807 2019-03-23 04:46:56 +08:00			`; NOSSE-NEXT: retl`
			`%val = load atomic i64, i64* %ptr seq_cst, align 8`
			`ret i64 %val`
			`}`

			`; Same as test2, but with noimplicitfloat.`
			`define i64 @test3(i64* %ptr) noimplicitfloat {`
			`; CHECK-LABEL: test3:`
[X86] Autogenerate complete checks. NFC llvm-svn: 356723 2019-03-22 07:09:56 +08:00			`; CHECK: # %bb.0:`
			`; CHECK-NEXT: pushl %ebx`
			`; CHECK-NEXT: .cfi_def_cfa_offset 8`
			`; CHECK-NEXT: pushl %esi`
			`; CHECK-NEXT: .cfi_def_cfa_offset 12`
			`; CHECK-NEXT: .cfi_offset %esi, -12`
			`; CHECK-NEXT: .cfi_offset %ebx, -8`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi`
			`; CHECK-NEXT: xorl %eax, %eax`
			`; CHECK-NEXT: xorl %edx, %edx`
			`; CHECK-NEXT: xorl %ecx, %ecx`
			`; CHECK-NEXT: xorl %ebx, %ebx`
			`; CHECK-NEXT: lock cmpxchg8b (%esi)`
			`; CHECK-NEXT: popl %esi`
			`; CHECK-NEXT: .cfi_def_cfa_offset 8`
			`; CHECK-NEXT: popl %ebx`
			`; CHECK-NEXT: .cfi_def_cfa_offset 4`
			`; CHECK-NEXT: retl`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%val = load atomic i64, i64* %ptr seq_cst, align 8`
Hook up 64-bit atomic load/store on x86-32. I plan to write more efficient implementations eventually. llvm-svn: 138505 2011-08-25 06:33:28 +08:00			`ret i64 %val`
			`}`
[X86] Pre-commit i64 volatile test case for D60156. NFC llvm-svn: 358210 2019-04-12 02:40:08 +08:00
			`define i64 @test4(i64* %ptr) {`
			`; SSE42-LABEL: test4:`
			`; SSE42: # %bb.0:`
			`; SSE42-NEXT: movl {{[0-9]+}}(%esp), %eax`
			`; SSE42-NEXT: movq {{.*#+}} xmm0 = mem[0],zero`
			`; SSE42-NEXT: movd %xmm0, %eax`
			`; SSE42-NEXT: pextrd $1, %xmm0, %edx`
			`; SSE42-NEXT: retl`
			`;`
			`; NOSSE-LABEL: test4:`
			`; NOSSE: # %bb.0:`
Recommit r358211 "[X86] Use FILD/FIST to implement i64 atomic load on 32-bit targets with X87, but no SSE2" With correct test checks this time. If we have X87, but not SSE2 we can atomicaly load an i64 value into the significand of an 80-bit extended precision x87 register using fild. We can then use a fist instruction to convert it back to an i64 integ This matches what gcc and icc do for this case and removes an existing FIXME. llvm-svn: 358214 2019-04-12 03:19:42 +08:00			`; NOSSE-NEXT: pushl %ebp`
[X86] Pre-commit i64 volatile test case for D60156. NFC llvm-svn: 358210 2019-04-12 02:40:08 +08:00			`; NOSSE-NEXT: .cfi_def_cfa_offset 8`
Recommit r358211 "[X86] Use FILD/FIST to implement i64 atomic load on 32-bit targets with X87, but no SSE2" With correct test checks this time. If we have X87, but not SSE2 we can atomicaly load an i64 value into the significand of an 80-bit extended precision x87 register using fild. We can then use a fist instruction to convert it back to an i64 integ This matches what gcc and icc do for this case and removes an existing FIXME. llvm-svn: 358214 2019-04-12 03:19:42 +08:00			`; NOSSE-NEXT: .cfi_offset %ebp, -8`
			`; NOSSE-NEXT: movl %esp, %ebp`
			`; NOSSE-NEXT: .cfi_def_cfa_register %ebp`
			`; NOSSE-NEXT: andl $-8, %esp`
			`; NOSSE-NEXT: subl $8, %esp`
			`; NOSSE-NEXT: movl 8(%ebp), %eax`
			`; NOSSE-NEXT: fildll (%eax)`
			`; NOSSE-NEXT: fistpll (%esp)`
			`; NOSSE-NEXT: movl (%esp), %eax`
			`; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx`
			`; NOSSE-NEXT: movl %ebp, %esp`
			`; NOSSE-NEXT: popl %ebp`
			`; NOSSE-NEXT: .cfi_def_cfa %esp, 4`
[X86] Pre-commit i64 volatile test case for D60156. NFC llvm-svn: 358210 2019-04-12 02:40:08 +08:00			`; NOSSE-NEXT: retl`
			`%val = load atomic volatile i64, i64* %ptr seq_cst, align 8`
			`ret i64 %val`
			`}`