llvm-project/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1421 lines
46 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O2 -mtriple=x86_64-linux-android \
; RUN: -enable-legalize-types-checking \
; RUN: | FileCheck %s
; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu \
; RUN: -enable-legalize-types-checking \
; RUN: | FileCheck %s
; RUN: llc < %s -O2 -mtriple=i686-linux-gnu -mattr=+sse2 \
; RUN: -enable-legalize-types-checking \
; RUN: | FileCheck %s --check-prefix=X86
; Check all soft floating point library function calls.
define fp128 @add(fp128 %x, fp128 %y) nounwind strictfp {
; CHECK-LABEL: add:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __addtf3
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: add:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll __addtf3
; X86-NEXT: addl $44, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%add = call fp128 @llvm.experimental.constrained.fadd.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret fp128 %add
}
define fp128 @sub(fp128 %x, fp128 %y) nounwind strictfp {
; CHECK-LABEL: sub:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __subtf3
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: sub:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll __subtf3
; X86-NEXT: addl $44, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%sub = call fp128 @llvm.experimental.constrained.fsub.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret fp128 %sub
}
define fp128 @mul(fp128 %x, fp128 %y) nounwind strictfp {
; CHECK-LABEL: mul:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __multf3
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: mul:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll __multf3
; X86-NEXT: addl $44, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%mul = call fp128 @llvm.experimental.constrained.fmul.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret fp128 %mul
}
define fp128 @div(fp128 %x, fp128 %y) nounwind strictfp {
; CHECK-LABEL: div:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __divtf3
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: div:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll __divtf3
; X86-NEXT: addl $44, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%div = call fp128 @llvm.experimental.constrained.fdiv.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret fp128 %div
}
define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp {
; CHECK-LABEL: fma:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq fmal
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: fma:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll fmal
; X86-NEXT: addl $60, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%fma = call fp128 @llvm.experimental.constrained.fma.f128(fp128 %x, fp128 %y, fp128 %z, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret fp128 %fma
}
define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp {
; CHECK-LABEL: frem:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq fmodl
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: frem:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll fmodl
; X86-NEXT: addl $44, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%div = call fp128 @llvm.experimental.constrained.frem.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret fp128 %div
}
define fp128 @ceil(fp128 %x) nounwind strictfp {
; CHECK-LABEL: ceil:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq ceill
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: ceil:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll ceill
; X86-NEXT: addl $28, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%ceil = call fp128 @llvm.experimental.constrained.ceil.f128(fp128 %x, metadata !"fpexcept.strict") #0
ret fp128 %ceil
}
define fp128 @cos(fp128 %x) nounwind strictfp {
; CHECK-LABEL: cos:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq cosl
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: cos:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll cosl
; X86-NEXT: addl $28, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%cos = call fp128 @llvm.experimental.constrained.cos.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret fp128 %cos
}
define fp128 @exp(fp128 %x) nounwind strictfp {
; CHECK-LABEL: exp:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq expl
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: exp:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll expl
; X86-NEXT: addl $28, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%exp = call fp128 @llvm.experimental.constrained.exp.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret fp128 %exp
}
define fp128 @exp2(fp128 %x) nounwind strictfp {
; CHECK-LABEL: exp2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq exp2l
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: exp2:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll exp2l
; X86-NEXT: addl $28, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%exp2 = call fp128 @llvm.experimental.constrained.exp2.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret fp128 %exp2
}
define fp128 @floor(fp128 %x) nounwind strictfp {
; CHECK-LABEL: floor:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq floorl
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: floor:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll floorl
; X86-NEXT: addl $28, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%floor = call fp128 @llvm.experimental.constrained.floor.f128(fp128 %x, metadata !"fpexcept.strict") #0
ret fp128 %floor
}
define fp128 @log(fp128 %x) nounwind strictfp {
; CHECK-LABEL: log:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq logl
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: log:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll logl
; X86-NEXT: addl $28, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%log = call fp128 @llvm.experimental.constrained.log.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret fp128 %log
}
define fp128 @log10(fp128 %x) nounwind strictfp {
; CHECK-LABEL: log10:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq log10l
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: log10:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll log10l
; X86-NEXT: addl $28, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%log10 = call fp128 @llvm.experimental.constrained.log10.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret fp128 %log10
}
define fp128 @log2(fp128 %x) nounwind strictfp {
; CHECK-LABEL: log2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq log2l
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: log2:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll log2l
; X86-NEXT: addl $28, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%log2 = call fp128 @llvm.experimental.constrained.log2.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret fp128 %log2
}
define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp {
; CHECK-LABEL: maxnum:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq fmaxl
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: maxnum:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll fmaxl
; X86-NEXT: addl $44, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%maxnum = call fp128 @llvm.experimental.constrained.maxnum.f128(fp128 %x, fp128 %y, metadata !"fpexcept.strict") #0
ret fp128 %maxnum
}
define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp {
; CHECK-LABEL: minnum:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq fminl
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: minnum:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll fminl
; X86-NEXT: addl $44, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%minnum = call fp128 @llvm.experimental.constrained.minnum.f128(fp128 %x, fp128 %y, metadata !"fpexcept.strict") #0
ret fp128 %minnum
}
define fp128 @nearbyint(fp128 %x) nounwind strictfp {
; CHECK-LABEL: nearbyint:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq nearbyintl
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: nearbyint:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll nearbyintl
; X86-NEXT: addl $28, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%nearbyint = call fp128 @llvm.experimental.constrained.nearbyint.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret fp128 %nearbyint
}
define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp {
; CHECK-LABEL: pow:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq powl
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: pow:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll powl
; X86-NEXT: addl $44, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%pow = call fp128 @llvm.experimental.constrained.pow.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret fp128 %pow
}
define fp128 @powi(fp128 %x, i32 %y) nounwind strictfp {
; CHECK-LABEL: powi:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __powitf2
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: powi:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $8, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll __powitf2
; X86-NEXT: addl $28, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%powi = call fp128 @llvm.experimental.constrained.powi.f128(fp128 %x, i32 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret fp128 %powi
}
define fp128 @rint(fp128 %x) nounwind strictfp {
; CHECK-LABEL: rint:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq rintl
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: rint:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll rintl
; X86-NEXT: addl $28, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%rint = call fp128 @llvm.experimental.constrained.rint.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret fp128 %rint
}
define fp128 @round(fp128 %x) nounwind strictfp {
; CHECK-LABEL: round:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq roundl
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: round:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll roundl
; X86-NEXT: addl $28, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%round = call fp128 @llvm.experimental.constrained.round.f128(fp128 %x, metadata !"fpexcept.strict") #0
ret fp128 %round
}
define fp128 @sin(fp128 %x) nounwind strictfp {
; CHECK-LABEL: sin:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq sinl
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: sin:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll sinl
; X86-NEXT: addl $28, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%sin = call fp128 @llvm.experimental.constrained.sin.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret fp128 %sin
}
define fp128 @sqrt(fp128 %x) nounwind strictfp {
; CHECK-LABEL: sqrt:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq sqrtl
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: sqrt:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll sqrtl
; X86-NEXT: addl $28, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%sqrt = call fp128 @llvm.experimental.constrained.sqrt.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret fp128 %sqrt
}
define fp128 @trunc(fp128 %x) nounwind strictfp {
; CHECK-LABEL: trunc:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq truncl
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
;
; X86-LABEL: trunc:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $20, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll truncl
; X86-NEXT: addl $28, %esp
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, 8(%esi)
; X86-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-NEXT: movl %eax, (%esi)
; X86-NEXT: movl %ecx, 4(%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl $4
entry:
%trunc = call fp128 @llvm.experimental.constrained.trunc.f128(fp128 %x, metadata !"fpexcept.strict") #0
ret fp128 %trunc
}
define i32 @lrint(fp128 %x) nounwind strictfp {
; CHECK-LABEL: lrint:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq lrintl
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: retq
;
; X86-LABEL: lrint:
; X86: # %bb.0: # %entry
; X86-NEXT: subl $12, %esp
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll lrintl
; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%rint = call i32 @llvm.experimental.constrained.lrint.i32.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret i32 %rint
}
define i64 @llrint(fp128 %x) nounwind strictfp {
; CHECK-LABEL: llrint:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq llrintl
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: retq
;
; X86-LABEL: llrint:
; X86: # %bb.0: # %entry
; X86-NEXT: subl $12, %esp
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll llrintl
; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%rint = call i64 @llvm.experimental.constrained.llrint.i64.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret i64 %rint
}
define i32 @lround(fp128 %x) nounwind strictfp {
; CHECK-LABEL: lround:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq lroundl
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: retq
;
; X86-LABEL: lround:
; X86: # %bb.0: # %entry
; X86-NEXT: subl $12, %esp
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll lroundl
; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%round = call i32 @llvm.experimental.constrained.lround.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0
ret i32 %round
}
define i64 @llround(fp128 %x) nounwind strictfp {
; CHECK-LABEL: llround:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq llroundl
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: retq
;
; X86-LABEL: llround:
; X86: # %bb.0: # %entry
; X86-NEXT: subl $12, %esp
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll llroundl
; X86-NEXT: addl $28, %esp
; X86-NEXT: retl
entry:
%round = call i64 @llvm.experimental.constrained.llround.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0
ret i64 %round
}
define i64 @cmp(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
; CHECK-LABEL: cmp:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: movq %rsi, %r14
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: callq __eqtf2
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: cmovneq %r14, %rbx
; CHECK-NEXT: movq %rbx, %rax
; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r14
; CHECK-NEXT: retq
;
; X86-LABEL: cmp:
; X86: # %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll __eqtf2
; X86-NEXT: addl $32, %esp
; X86-NEXT: testl %eax, %eax
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmovel %eax, %ecx
; X86-NEXT: movl (%ecx), %eax
; X86-NEXT: movl 4(%ecx), %edx
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
%cond = call i1 @llvm.experimental.constrained.fcmp.f128(
fp128 %x, fp128 %y,
metadata !"oeq",
metadata !"fpexcept.strict") #0
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
}
define i64 @cmps(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
; CHECK-LABEL: cmps:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: movq %rsi, %r14
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: callq __eqtf2
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: cmovneq %r14, %rbx
; CHECK-NEXT: movq %rbx, %rax
; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r14
; CHECK-NEXT: retq
;
; X86-LABEL: cmps:
; X86: # %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll __eqtf2
; X86-NEXT: addl $32, %esp
; X86-NEXT: testl %eax, %eax
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmovel %eax, %ecx
; X86-NEXT: movl (%ecx), %eax
; X86-NEXT: movl 4(%ecx), %edx
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
%cond = call i1 @llvm.experimental.constrained.fcmps.f128(
fp128 %x, fp128 %y,
metadata !"oeq",
metadata !"fpexcept.strict") #0
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
}
define i64 @cmp_ueq_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
; CHECK-LABEL: cmp_ueq_q:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: subq $32, %rsp
; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: movq %rsi, %r14
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: callq __eqtf2
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: sete %bpl
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-NEXT: callq __unordtf2
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: setne %al
; CHECK-NEXT: orb %bpl, %al
; CHECK-NEXT: cmoveq %r14, %rbx
; CHECK-NEXT: movq %rbx, %rax
; CHECK-NEXT: addq $32, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r14
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
;
; X86-LABEL: cmp_ueq_q:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %ebx
; X86-NEXT: movl %ebx, %esi
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll __eqtf2
; X86-NEXT: addl $32, %esp
; X86-NEXT: testl %eax, %eax
; X86-NEXT: sete %bl
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll __unordtf2
; X86-NEXT: addl $32, %esp
; X86-NEXT: testl %eax, %eax
; X86-NEXT: setne %al
; X86-NEXT: orb %bl, %al
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmovnel %eax, %ecx
; X86-NEXT: movl (%ecx), %eax
; X86-NEXT: movl 4(%ecx), %edx
; X86-NEXT: addl $12, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl
%cond = call i1 @llvm.experimental.constrained.fcmp.f128(
fp128 %x, fp128 %y,
metadata !"ueq",
metadata !"fpexcept.strict") #0
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
}
define i64 @cmp_one_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
; CHECK-LABEL: cmp_one_q:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: subq $32, %rsp
; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: movq %rsi, %r14
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: callq __eqtf2
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: setne %bpl
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; CHECK-NEXT: callq __unordtf2
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: sete %al
; CHECK-NEXT: testb %bpl, %al
; CHECK-NEXT: cmoveq %r14, %rbx
; CHECK-NEXT: movq %rbx, %rax
; CHECK-NEXT: addq $32, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r14
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
;
; X86-LABEL: cmp_one_q:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $12, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %ebx
; X86-NEXT: movl %ebx, %esi
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll __eqtf2
; X86-NEXT: addl $32, %esp
; X86-NEXT: testl %eax, %eax
; X86-NEXT: setne %bl
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll __unordtf2
; X86-NEXT: addl $32, %esp
; X86-NEXT: testl %eax, %eax
; X86-NEXT: sete %al
; X86-NEXT: testb %bl, %al
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmovnel %eax, %ecx
; X86-NEXT: movl (%ecx), %eax
; X86-NEXT: movl 4(%ecx), %edx
; X86-NEXT: addl $12, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl
%cond = call i1 @llvm.experimental.constrained.fcmp.f128(
fp128 %x, fp128 %y,
metadata !"one",
metadata !"fpexcept.strict") #0
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
}
attributes #0 = { nounwind strictfp }
declare fp128 @llvm.experimental.constrained.fadd.f128(fp128, fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.fsub.f128(fp128, fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.fmul.f128(fp128, fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.fdiv.f128(fp128, fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.fma.f128(fp128, fp128, fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.frem.f128(fp128, fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata)
declare fp128 @llvm.experimental.constrained.cos.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.exp.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.exp2.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata)
declare fp128 @llvm.experimental.constrained.log.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.log10.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.log2.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.maxnum.f128(fp128, fp128, metadata)
declare fp128 @llvm.experimental.constrained.minnum.f128(fp128, fp128, metadata)
declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.pow.f128(fp128, fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.powi.f128(fp128, i32, metadata, metadata)
declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata)
declare fp128 @llvm.experimental.constrained.sin.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata)
declare i32 @llvm.experimental.constrained.lrint.i32.f128(fp128, metadata, metadata)
declare i64 @llvm.experimental.constrained.llrint.i64.f128(fp128, metadata, metadata)
declare i32 @llvm.experimental.constrained.lround.i32.f128(fp128, metadata)
declare i64 @llvm.experimental.constrained.llround.i64.f128(fp128, metadata)
declare i1 @llvm.experimental.constrained.fcmp.f128(fp128, fp128, metadata, metadata)
declare i1 @llvm.experimental.constrained.fcmps.f128(fp128, fp128, metadata, metadata)