llvm-project/llvm/test/CodeGen/X86/combineIncDecVector-crash.ll

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

52 lines
2.1 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s | FileCheck %s
; This used to crash, just ensure that it doesn't.
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
target triple = "x86_64-unknown-linux-gnu"
define void @TestvMeth(i32 %0, i64 %1) gc "statepoint-example" !prof !1 {
; CHECK-LABEL: TestvMeth:
; CHECK: # %bb.0: # %bci_0
; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movl $10, %esi
; CHECK-NEXT: movl $10, %edx
; CHECK-NEXT: movl $400, %ecx # imm = 0x190
; CHECK-NEXT: callq newarray
; CHECK-NEXT: .Ltmp0:
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
[X86/Atomics] (Semantically) revert G246098, switch back to the old atomic example When writing an email for a follow up proposal, I realized one of the diffs in the committed change was incorrect. Digging into it revealed that the fix is complicated enough to require some thought, so reverting in the meantime. The problem is visible in this diff (from the revert): ; X64-SSE-LABEL: store_fp128: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movaps %xmm0, (%rdi) +; X64-SSE-NEXT: subq $24, %rsp +; X64-SSE-NEXT: .cfi_def_cfa_offset 32 +; X64-SSE-NEXT: movaps %xmm0, (%rsp) +; X64-SSE-NEXT: movq (%rsp), %rsi +; X64-SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-SSE-NEXT: callq __sync_lock_test_and_set_16 +; X64-SSE-NEXT: addq $24, %rsp +; X64-SSE-NEXT: .cfi_def_cfa_offset 8 ; X64-SSE-NEXT: retq store atomic fp128 %v, fp128* %fptr unordered, align 16 ret void The problem here is three fold: 1) x86-64 doesn't guarantee atomicity of anything larger than 8 bytes. Some platforms observably break this guarantee, others don't, but the codegen isn't considering this, so it's wrong on at least some platforms. 2) When I started to track down the problem, I discovered that DAGCombiner had stripped the atomicity off the store entirely. This comes down to idiomatic usage of DAG.getStore passing all MMO components separately as opposed to just passing the MMO. 3) On x86 (not -64), there are cases where 8 byte atomiciy is supported, but only for floating point operations. This would seem to imply that operation typing matters for correctness, and DAGCombine happily folds away bitcasts. I'm not 100% sure there's a problem here, but I'm not entirely sure there isn't either. I plan on returning to each issue in turn; sorry for the churn here.
2019-11-06 03:15:09 +08:00
; CHECK-NEXT: addss (%rax), %xmm0
; CHECK-NEXT: movdqu (%rax), %xmm1
; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
; CHECK-NEXT: psubd %xmm2, %xmm1
; CHECK-NEXT: movdqu %xmm1, (%rax)
; CHECK-NEXT: movss %xmm0, (%rax)
bci_0:
%token418 = call token (i64, i32, i8 * (i64, i32, i32, i32)*, i32,
i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8i64i32i32i32f(i64
2882400000, i32 0, i8 * (i64, i32, i32, i32)* nonnull @newarray, i32 4,
i32 0, i64 undef, i32 10, i32 10, i32 400, i32 0, i32 35, i32 0, i32 1,
i32 0, i32 43, i32 1, i32 13, i32 0, i32 3, i32 400, i32 3, i32 %0, i32
4, i64 %1, i32 7, i8* null, i32 3, i32 -11464, i32 7, i8* null, i32 3,
i32 -243, i32 3, i32 14, i32 3, i32 117, i32 3, i32 -13, i32 3, i32 -15,
i32 3, i32 -210, i32 3, i32 541, i32 7, i8* null)
%v2 = load atomic float, float * undef unordered, align 4
%v3 = load <4 x i32>, <4 x i32> * undef, align 4
%v4 = add <4 x i32> %v3, <i32 1, i32 1, i32 1, i32 1>
store <4 x i32> %v4, <4 x i32> * undef, align 4
%v5 = fadd float %v2, 1.500000e+01
store atomic float %v5, float * undef unordered, align 4
unreachable
}
declare i32* @personality_function()
declare i8 * @newarray(i64, i32, i32, i32)
declare token @llvm.experimental.gc.statepoint.p0f_p1i8i64i32i32i32f(i64
immarg, i32 immarg, i8 * (i64, i32, i32, i32)*, i32 immarg, i32 immarg, ...)
!1 = !{!"function_entry_count", i64 32768}