llvm-project/llvm/test/CodeGen/X86/fp-intrinsics.ll

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

2820 lines
90 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -O3 -mtriple=i686-pc-linux -mattr=+cmov < %s | FileCheck %s --check-prefix=COMMON --check-prefix=X87
; RUN: llc -O3 -mtriple=i686-pc-linux -mattr=sse2 < %s | FileCheck %s --check-prefix=COMMON --check-prefix=X86-SSE
; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s --check-prefix=COMMON --check-prefix=SSE
; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx < %s | FileCheck %s --check-prefix=COMMON --check-prefix=AVX --check-prefix=AVX1
; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512f < %s | FileCheck %s --check-prefix=COMMON --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512dq < %s | FileCheck %s --check-prefix=COMMON --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ
; Verify that constants aren't folded to inexact results when the rounding mode
; is unknown.
;
; double f1() {
; // Because 0.1 cannot be represented exactly, this shouldn't be folded.
; return 1.0/10.0;
; }
;
define double @f1() #0 {
; X87-LABEL: f1:
; X87: # %bb.0: # %entry
; X87-NEXT: fld1
; X87-NEXT: fdivs {{\.LCPI.*}}
; X87-NEXT: wait
; X87-NEXT: retl
;
; X86-SSE-LABEL: f1:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: divsd {{\.LCPI.*}}, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f1:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: divsd {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: f1:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vdivsd {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%div = call double @llvm.experimental.constrained.fdiv.f64(
double 1.000000e+00,
double 1.000000e+01,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %div
}
; Verify that 'a - 0' isn't simplified to 'a' when the rounding mode is unknown.
;
; double f2(double a) {
; // Because the result of '0 - 0' is negative zero if rounding mode is
; // downward, this shouldn't be simplified.
; return a - 0;
; }
;
define double @f2(double %a) #0 {
; X87-LABEL: f2:
; X87: # %bb.0: # %entry
; X87-NEXT: fldz
; X87-NEXT: fsubrl {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: retl
;
; X86-SSE-LABEL: f2:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: xorpd %xmm1, %xmm1
; X86-SSE-NEXT: subsd %xmm1, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f2:
; SSE: # %bb.0: # %entry
; SSE-NEXT: xorpd %xmm1, %xmm1
; SSE-NEXT: subsd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: f2:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%sub = call double @llvm.experimental.constrained.fsub.f64(
double %a,
double 0.000000e+00,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %sub
}
; Verify that '-((-a)*b)' isn't simplified to 'a*b' when the rounding mode is
; unknown.
;
; double f3(double a, double b) {
; // Because the intermediate value involved in this calculation may require
; // rounding, this shouldn't be simplified.
; return -((-a)*b);
; }
;
define double @f3(double %a, double %b) #0 {
; X87-LABEL: f3:
; X87: # %bb.0: # %entry
; X87-NEXT: fldz
; X87-NEXT: fchs
; X87-NEXT: fld %st(0)
; X87-NEXT: fsubl {{[0-9]+}}(%esp)
; X87-NEXT: fmull {{[0-9]+}}(%esp)
; X87-NEXT: fsubrp %st, %st(1)
; X87-NEXT: wait
; X87-NEXT: retl
;
; X86-SSE-LABEL: f3:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movapd %xmm0, %xmm1
; X86-SSE-NEXT: subsd {{[0-9]+}}(%esp), %xmm1
; X86-SSE-NEXT: mulsd {{[0-9]+}}(%esp), %xmm1
; X86-SSE-NEXT: subsd %xmm1, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f3:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
; SSE-NEXT: movapd %xmm2, %xmm3
; SSE-NEXT: subsd %xmm0, %xmm3
; SSE-NEXT: mulsd %xmm1, %xmm3
; SSE-NEXT: subsd %xmm3, %xmm2
; SSE-NEXT: movapd %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: f3:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; AVX-NEXT: vsubsd %xmm0, %xmm2, %xmm0
; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vsubsd %xmm0, %xmm2, %xmm0
; AVX-NEXT: retq
entry:
%sub = call double @llvm.experimental.constrained.fsub.f64(
double -0.000000e+00, double %a,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
%mul = call double @llvm.experimental.constrained.fmul.f64(
double %sub, double %b,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
%ret = call double @llvm.experimental.constrained.fsub.f64(
double -0.000000e+00,
double %mul,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %ret
}
; Verify that FP operations are not performed speculatively when FP exceptions
; are not being ignored.
;
; double f4(int n, double a) {
; // Because a + 1 may overflow, this should not be simplified.
; if (n > 0)
; return a + 1.0;
; return a;
; }
;
;
define double @f4(i32 %n, double %a) #0 {
; X87-LABEL: f4:
; X87: # %bb.0: # %entry
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; X87-NEXT: jle .LBB3_2
; X87-NEXT: # %bb.1: # %if.then
; X87-NEXT: fld1
; X87-NEXT: faddp %st, %st(1)
; X87-NEXT: wait
; X87-NEXT: .LBB3_2: # %if.end
; X87-NEXT: retl
;
; X86-SSE-LABEL: f4:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: jle .LBB3_2
; X86-SSE-NEXT: # %bb.1: # %if.then
; X86-SSE-NEXT: addsd {{\.LCPI.*}}, %xmm0
; X86-SSE-NEXT: .LBB3_2: # %if.end
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f4:
; SSE: # %bb.0: # %entry
; SSE-NEXT: testl %edi, %edi
; SSE-NEXT: jle .LBB3_2
; SSE-NEXT: # %bb.1: # %if.then
; SSE-NEXT: addsd {{.*}}(%rip), %xmm0
; SSE-NEXT: .LBB3_2: # %if.end
; SSE-NEXT: retq
;
; AVX-LABEL: f4:
; AVX: # %bb.0: # %entry
; AVX-NEXT: testl %edi, %edi
; AVX-NEXT: jle .LBB3_2
; AVX-NEXT: # %bb.1: # %if.then
; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: .LBB3_2: # %if.end
; AVX-NEXT: retq
entry:
%cmp = icmp sgt i32 %n, 0
br i1 %cmp, label %if.then, label %if.end
if.then:
%add = call double @llvm.experimental.constrained.fadd.f64(
double 1.000000e+00, double %a,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
br label %if.end
if.end:
%a.0 = phi double [%add, %if.then], [ %a, %entry ]
ret double %a.0
}
; Verify that sqrt(42.0) isn't simplified when the rounding mode is unknown.
define double @f5() #0 {
; X87-LABEL: f5:
; X87: # %bb.0: # %entry
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: fsqrt
; X87-NEXT: wait
; X87-NEXT: retl
;
; X86-SSE-LABEL: f5:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: sqrtsd %xmm0, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f5:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: sqrtsd %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: f5:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.sqrt.f64(double 42.0,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
; Verify that pow(42.1, 3.0) isn't simplified when the rounding mode is unknown.
define double @f6() #0 {
; X87-LABEL: f6:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $28, %esp
; X87-NEXT: .cfi_def_cfa_offset 32
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
; X87-NEXT: wait
; X87-NEXT: calll pow
; X87-NEXT: addl $28, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f6:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $28, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 32
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: calll pow
; X86-SSE-NEXT: addl $28, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f6:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: callq pow
; SSE-NEXT: popq %rax
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f6:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: callq pow
; AVX-NEXT: popq %rax
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.pow.f64(double 42.1,
double 3.0,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
; Verify that powi(42.1, 3) isn't simplified when the rounding mode is unknown.
define double @f7() #0 {
; X87-LABEL: f7:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: fldl {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
; X87-NEXT: wait
; X87-NEXT: movl $3, {{[0-9]+}}(%esp)
; X87-NEXT: calll __powidf2
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f7:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: movl $3, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: calll __powidf2
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f7:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movl $3, %edi
; SSE-NEXT: callq __powidf2
; SSE-NEXT: popq %rax
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f7:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: movl $3, %edi
; AVX-NEXT: callq __powidf2
; AVX-NEXT: popq %rax
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.powi.f64(double 42.1,
i32 3,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
; Verify that sin(42.0) isn't simplified when the rounding mode is unknown.
define double @f8() #0 {
; X87-LABEL: f8:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
; X87-NEXT: wait
; X87-NEXT: calll sin
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f8:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: calll sin
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f8:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: callq sin
; SSE-NEXT: popq %rax
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f8:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: callq sin
; AVX-NEXT: popq %rax
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.sin.f64(double 42.0,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
; Verify that cos(42.0) isn't simplified when the rounding mode is unknown.
define double @f9() #0 {
; X87-LABEL: f9:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
; X87-NEXT: wait
; X87-NEXT: calll cos
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f9:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: calll cos
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f9:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: callq cos
; SSE-NEXT: popq %rax
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f9:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: callq cos
; AVX-NEXT: popq %rax
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.cos.f64(double 42.0,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
; Verify that exp(42.0) isn't simplified when the rounding mode is unknown.
define double @f10() #0 {
; X87-LABEL: f10:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
; X87-NEXT: wait
; X87-NEXT: calll exp
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f10:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: calll exp
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f10:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: callq exp
; SSE-NEXT: popq %rax
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f10:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: callq exp
; AVX-NEXT: popq %rax
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.exp.f64(double 42.0,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
; Verify that exp2(42.1) isn't simplified when the rounding mode is unknown.
define double @f11() #0 {
; X87-LABEL: f11:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: fldl {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
; X87-NEXT: wait
; X87-NEXT: calll exp2
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f11:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: calll exp2
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f11:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: callq exp2
; SSE-NEXT: popq %rax
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f11:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: callq exp2
; AVX-NEXT: popq %rax
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.exp2.f64(double 42.1,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
; Verify that log(42.0) isn't simplified when the rounding mode is unknown.
define double @f12() #0 {
; X87-LABEL: f12:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
; X87-NEXT: wait
; X87-NEXT: calll log
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f12:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: calll log
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f12:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: callq log
; SSE-NEXT: popq %rax
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f12:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: callq log
; AVX-NEXT: popq %rax
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.log.f64(double 42.0,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
; Verify that log10(42.0) isn't simplified when the rounding mode is unknown.
define double @f13() #0 {
; X87-LABEL: f13:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
; X87-NEXT: wait
; X87-NEXT: calll log10
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f13:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: calll log10
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f13:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: callq log10
; SSE-NEXT: popq %rax
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f13:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: callq log10
; AVX-NEXT: popq %rax
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.log10.f64(double 42.0,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
; Verify that log2(42.0) isn't simplified when the rounding mode is unknown.
define double @f14() #0 {
; X87-LABEL: f14:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
; X87-NEXT: wait
; X87-NEXT: calll log2
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f14:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: calll log2
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f14:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: callq log2
; SSE-NEXT: popq %rax
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f14:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: callq log2
; AVX-NEXT: popq %rax
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.log2.f64(double 42.0,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
; Verify that rint(42.1) isn't simplified when the rounding mode is unknown.
define double @f15() #0 {
; X87-LABEL: f15:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: fldl {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
; X87-NEXT: wait
; X87-NEXT: calll rint
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f15:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: calll rint
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f15:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: callq rint
; SSE-NEXT: popq %rax
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f15:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.rint.f64(double 42.1,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
; Verify that nearbyint(42.1) isn't simplified when the rounding mode is
; unknown.
define double @f16() #0 {
; X87-LABEL: f16:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: fldl {{\.LCPI.*}}
; X87-NEXT: fstpl (%esp)
; X87-NEXT: wait
; X87-NEXT: calll nearbyint
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f16:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: calll nearbyint
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f16:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: callq nearbyint
; SSE-NEXT: popq %rax
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f16:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.nearbyint.f64(
double 42.1,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
define double @f19() #0 {
; X87-LABEL: f19:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $28, %esp
; X87-NEXT: .cfi_def_cfa_offset 32
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: movl $1072693248, {{[0-9]+}}(%esp) # imm = 0x3FF00000
; X87-NEXT: movl $0, (%esp)
; X87-NEXT: calll fmod
; X87-NEXT: addl $28, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f19:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $28, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 32
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: calll fmod
; X86-SSE-NEXT: addl $28, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f19:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: callq fmod
; SSE-NEXT: popq %rax
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f19:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: callq fmod
; AVX-NEXT: popq %rax
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
%rem = call double @llvm.experimental.constrained.frem.f64(
double 1.000000e+00,
double 1.000000e+01,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %rem
}
; Verify that fptosi(%x) isn't simplified when the rounding mode is
; unknown.
; Verify that no gross errors happen.
; FIXME: The SSE/AVX code does not raise an invalid exception for all values
; that don't fit in i8.
define i8 @f20s8(double %x) #0 {
; X87-LABEL: f20s8:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 12
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: orl $3072, %eax # imm = 0xC00
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: fistps {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: movb {{[0-9]+}}(%esp), %al
; X87-NEXT: addl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f20s8:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: cvttsd2si {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: # kill: def $al killed $al killed $eax
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f20s8:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvttsd2si %xmm0, %eax
; SSE-NEXT: # kill: def $al killed $al killed $eax
; SSE-NEXT: retq
;
; AVX-LABEL: f20s8:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvttsd2si %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
entry:
%result = call i8 @llvm.experimental.constrained.fptosi.i8.f64(double %x,
metadata !"fpexcept.strict") #0
ret i8 %result
}
; Verify that fptosi(%x) isn't simplified when the rounding mode is
; unknown.
; Verify that no gross errors happen.
; FIXME: The SSE/AVX code does not raise an invalid exception for all values
; that don't fit in i16.
define i16 @f20s16(double %x) #0 {
; X87-LABEL: f20s16:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 12
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: orl $3072, %eax # imm = 0xC00
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: fistps {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: addl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f20s16:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: cvttsd2si {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f20s16:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvttsd2si %xmm0, %eax
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
; SSE-NEXT: retq
;
; AVX-LABEL: f20s16:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvttsd2si %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
entry:
%result = call i16 @llvm.experimental.constrained.fptosi.i16.f64(double %x,
metadata !"fpexcept.strict") #0
ret i16 %result
}
; Verify that fptosi(%x) isn't simplified when the rounding mode is
; unknown.
; Verify that no gross errors happen.
define i32 @f20s(double %x) #0 {
; X87-LABEL: f20s:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 12
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: fnstcw (%esp)
; X87-NEXT: movzwl (%esp), %eax
; X87-NEXT: orl $3072, %eax # imm = 0xC00
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: fistpl {{[0-9]+}}(%esp)
; X87-NEXT: fldcw (%esp)
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: addl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f20s:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: cvttsd2si {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f20s:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvttsd2si %xmm0, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: f20s:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvttsd2si %xmm0, %eax
; AVX-NEXT: retq
entry:
%result = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x,
metadata !"fpexcept.strict") #0
ret i32 %result
}
; Verify that fptoui(%x) isn't simplified when the rounding mode is
; unknown.
; Verify that no gross errors happen.
; FIXME: This code generates spurious inexact exceptions.
define i64 @f20s64(double %x) #0 {
; X87-LABEL: f20s64:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $20, %esp
; X87-NEXT: .cfi_def_cfa_offset 24
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: orl $3072, %eax # imm = 0xC00
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: fistpll {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl {{[0-9]+}}(%esp), %edx
; X87-NEXT: addl $20, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f20s64:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $20, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 24
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldl {{[0-9]+}}(%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
; X86-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE-NEXT: addl $20, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f20s64:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvttsd2si %xmm0, %rax
; SSE-NEXT: retq
;
; AVX-LABEL: f20s64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvttsd2si %xmm0, %rax
; AVX-NEXT: retq
entry:
%result = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x,
metadata !"fpexcept.strict") #0
ret i64 %result
}
; Verify that fptoui(%x) isn't simplified when the rounding mode is
; unknown.
; Verify that no gross errors happen.
define i128 @f20s128(double %x) nounwind strictfp {
; X87-LABEL: f20s128:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %edi
; X87-NEXT: pushl %esi
; X87-NEXT: subl $36, %esp
; X87-NEXT: movl {{[0-9]+}}(%esp), %esi
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: calll __fixdfti
; X87-NEXT: subl $4, %esp
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X87-NEXT: movl {{[0-9]+}}(%esp), %edx
; X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X87-NEXT: movl %edi, 8(%esi)
; X87-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X87-NEXT: movl %eax, (%esi)
; X87-NEXT: movl %ecx, 4(%esi)
; X87-NEXT: movl %esi, %eax
; X87-NEXT: addl $36, %esp
; X87-NEXT: popl %esi
; X87-NEXT: popl %edi
; X87-NEXT: retl $4
;
; X86-SSE-LABEL: f20s128:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: pushl %edi
; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $36, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __fixdfti
; X86-SSE-NEXT: subl $4, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SSE-NEXT: movl %edi, 8(%esi)
; X86-SSE-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-SSE-NEXT: movl %eax, (%esi)
; X86-SSE-NEXT: movl %ecx, 4(%esi)
; X86-SSE-NEXT: movl %esi, %eax
; X86-SSE-NEXT: addl $36, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: popl %edi
; X86-SSE-NEXT: retl $4
;
; SSE-LABEL: f20s128:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: callq __fixdfti
; SSE-NEXT: popq %rcx
; SSE-NEXT: retq
;
; AVX-LABEL: f20s128:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: callq __fixdfti
; AVX-NEXT: popq %rcx
; AVX-NEXT: retq
entry:
%result = call i128 @llvm.experimental.constrained.fptosi.i128.f64(double %x,
metadata !"fpexcept.strict") #0
ret i128 %result
}
; Verify that fptoui(%x) isn't simplified when the rounding mode is
; unknown.
; Verify that no gross errors happen.
; FIXME: The SSE/AVX code does not raise an invalid exception for all values
; that don't fit in i8.
define i8 @f20u8(double %x) #0 {
; X87-LABEL: f20u8:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 12
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: orl $3072, %eax # imm = 0xC00
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: fistps {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: movb {{[0-9]+}}(%esp), %al
; X87-NEXT: addl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f20u8:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: cvttsd2si {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: # kill: def $al killed $al killed $eax
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f20u8:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvttsd2si %xmm0, %eax
; SSE-NEXT: # kill: def $al killed $al killed $eax
; SSE-NEXT: retq
;
; AVX-LABEL: f20u8:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvttsd2si %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
entry:
%result = call i8 @llvm.experimental.constrained.fptoui.i8.f64(double %x,
metadata !"fpexcept.strict") #0
ret i8 %result
}
; Verify that fptoui(%x) isn't simplified when the rounding mode is
; unknown.
; Verify that no gross errors happen.
; FIXME: The SSE/AVX code does not raise an invalid exception for all values
; that don't fit in i16.
define i16 @f20u16(double %x) #0 {
; X87-LABEL: f20u16:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 12
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: fnstcw (%esp)
; X87-NEXT: movzwl (%esp), %eax
; X87-NEXT: orl $3072, %eax # imm = 0xC00
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: fistpl {{[0-9]+}}(%esp)
; X87-NEXT: fldcw (%esp)
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: # kill: def $ax killed $ax killed $eax
; X87-NEXT: addl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f20u16:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: cvttsd2si {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f20u16:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvttsd2si %xmm0, %eax
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
; SSE-NEXT: retq
;
; AVX-LABEL: f20u16:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvttsd2si %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
entry:
%result = call i16 @llvm.experimental.constrained.fptoui.i16.f64(double %x,
metadata !"fpexcept.strict") #0
ret i16 %result
}
; Verify that fptoui(%x) isn't simplified when the rounding mode is
; unknown.
; Verify that no gross errors happen.
; FIXME: The X87/SSE/AVX1 code does not raise an invalid exception for all
; values that don't fit in i32. The AVX512 code does.
define i32 @f20u(double %x) #0 {
; X87-LABEL: f20u:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $20, %esp
; X87-NEXT: .cfi_def_cfa_offset 24
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: orl $3072, %eax # imm = 0xC00
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: fistpll {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: addl $20, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f20u:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
; X86-SSE-NEXT: comisd %xmm0, %xmm2
; X86-SSE-NEXT: xorpd %xmm1, %xmm1
; X86-SSE-NEXT: ja .LBB24_2
; X86-SSE-NEXT: # %bb.1: # %entry
; X86-SSE-NEXT: movapd %xmm2, %xmm1
; X86-SSE-NEXT: .LBB24_2: # %entry
; X86-SSE-NEXT: setbe %al
; X86-SSE-NEXT: movzbl %al, %ecx
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X86-SSE-NEXT: shll $31, %ecx
; X86-SSE-NEXT: subsd %xmm1, %xmm0
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X86-SSE-NEXT: cvttsd2si %xmm0, %eax
; X86-SSE-NEXT: xorl %ecx, %eax
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f20u:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvttsd2si %xmm0, %rax
; SSE-NEXT: # kill: def $eax killed $eax killed $rax
; SSE-NEXT: retq
;
; AVX1-LABEL: f20u:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vcvttsd2si %xmm0, %rax
; AVX1-NEXT: # kill: def $eax killed $eax killed $rax
; AVX1-NEXT: retq
;
; AVX512-LABEL: f20u:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvttsd2usi %xmm0, %eax
; AVX512-NEXT: retq
entry:
%result = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x,
metadata !"fpexcept.strict") #0
ret i32 %result
}
; Verify that fptoui(%x) isn't simplified when the rounding mode is
; unknown.
; Verify that no gross errors happen.
; FIXME: This code generates spurious inexact exceptions.
define i64 @f20u64(double %x) #0 {
; X87-LABEL: f20u64:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $20, %esp
; X87-NEXT: .cfi_def_cfa_offset 24
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: wait
; X87-NEXT: xorl %edx, %edx
; X87-NEXT: fcomi %st(1), %st
; X87-NEXT: wait
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X87-NEXT: setbe %dl
; X87-NEXT: fldz
; X87-NEXT: fxch %st(1)
; X87-NEXT: fcmovnbe %st(1), %st
; X87-NEXT: fstp %st(1)
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X87-NEXT: fsubrp %st, %st(1)
; X87-NEXT: wait
; X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: orl $3072, %eax # imm = 0xC00
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: fistpll {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: shll $31, %edx
; X87-NEXT: xorl {{[0-9]+}}(%esp), %edx
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: addl $20, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f20u64:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $20, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 24
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
; X86-SSE-NEXT: comisd %xmm0, %xmm2
; X86-SSE-NEXT: xorpd %xmm1, %xmm1
; X86-SSE-NEXT: ja .LBB25_2
; X86-SSE-NEXT: # %bb.1: # %entry
; X86-SSE-NEXT: movapd %xmm2, %xmm1
; X86-SSE-NEXT: .LBB25_2: # %entry
; X86-SSE-NEXT: subsd %xmm1, %xmm0
; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: setbe %al
; X86-SSE-NEXT: fldl {{[0-9]+}}(%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-SSE-NEXT: orl $3072, %ecx # imm = 0xC00
; X86-SSE-NEXT: movw %cx, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzbl %al, %edx
; X86-SSE-NEXT: shll $31, %edx
; X86-SSE-NEXT: xorl {{[0-9]+}}(%esp), %edx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: addl $20, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f20u64:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
; SSE-NEXT: comisd %xmm2, %xmm0
; SSE-NEXT: xorpd %xmm1, %xmm1
; SSE-NEXT: jb .LBB25_2
; SSE-NEXT: # %bb.1: # %entry
; SSE-NEXT: movapd %xmm2, %xmm1
; SSE-NEXT: .LBB25_2: # %entry
; SSE-NEXT: subsd %xmm1, %xmm0
; SSE-NEXT: cvttsd2si %xmm0, %rcx
; SSE-NEXT: setae %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: shlq $63, %rax
; SSE-NEXT: xorq %rcx, %rax
; SSE-NEXT: retq
;
; AVX1-LABEL: f20u64:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vcomisd %xmm1, %xmm0
; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: jb .LBB25_2
; AVX1-NEXT: # %bb.1: # %entry
; AVX1-NEXT: vmovapd %xmm1, %xmm2
; AVX1-NEXT: .LBB25_2: # %entry
; AVX1-NEXT: vsubsd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
; AVX1-NEXT: setae %al
; AVX1-NEXT: movzbl %al, %eax
; AVX1-NEXT: shlq $63, %rax
; AVX1-NEXT: xorq %rcx, %rax
; AVX1-NEXT: retq
;
; AVX512-LABEL: f20u64:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvttsd2usi %xmm0, %rax
; AVX512-NEXT: retq
entry:
%result = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x,
metadata !"fpexcept.strict") #0
ret i64 %result
}
; Verify that fptoui(%x) isn't simplified when the rounding mode is
; unknown.
; Verify that no gross errors happen.
define i128 @f20u128(double %x) nounwind strictfp {
; X87-LABEL: f20u128:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %edi
; X87-NEXT: pushl %esi
; X87-NEXT: subl $36, %esp
; X87-NEXT: movl {{[0-9]+}}(%esp), %esi
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: leal {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: calll __fixunsdfti
; X87-NEXT: subl $4, %esp
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X87-NEXT: movl {{[0-9]+}}(%esp), %edx
; X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X87-NEXT: movl %edi, 8(%esi)
; X87-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X87-NEXT: movl %eax, (%esi)
; X87-NEXT: movl %ecx, 4(%esi)
; X87-NEXT: movl %esi, %eax
; X87-NEXT: addl $36, %esp
; X87-NEXT: popl %esi
; X87-NEXT: popl %edi
; X87-NEXT: retl $4
;
; X86-SSE-LABEL: f20u128:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: pushl %edi
; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $36, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __fixunsdfti
; X86-SSE-NEXT: subl $4, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SSE-NEXT: movl %edi, 8(%esi)
; X86-SSE-NEXT: movl %edx, 12(%esi)
[FPEnv][SelectionDAG] Relax chain requirements This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
2019-12-06 18:02:11 +08:00
; X86-SSE-NEXT: movl %eax, (%esi)
; X86-SSE-NEXT: movl %ecx, 4(%esi)
; X86-SSE-NEXT: movl %esi, %eax
; X86-SSE-NEXT: addl $36, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: popl %edi
; X86-SSE-NEXT: retl $4
;
; SSE-LABEL: f20u128:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: callq __fixunsdfti
; SSE-NEXT: popq %rcx
; SSE-NEXT: retq
;
; AVX-LABEL: f20u128:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: callq __fixunsdfti
; AVX-NEXT: popq %rcx
; AVX-NEXT: retq
entry:
%result = call i128 @llvm.experimental.constrained.fptoui.i128.f64(double %x,
metadata !"fpexcept.strict") #0
ret i128 %result
}
; Verify that round(42.1) isn't simplified when the rounding mode is
; unknown.
; Verify that no gross errors happen.
define float @f21() #0 {
; X87-LABEL: f21:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: fldl {{\.LCPI.*}}
; X87-NEXT: fstps (%esp)
; X87-NEXT: flds (%esp)
; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f21:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: pushl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: cvtsd2ss %xmm0, %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f21:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: cvtsd2ss %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: f21:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.fptrunc.f32.f64(
double 42.1,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %result
}
define double @f22(float %x) #0 {
; X87-LABEL: f22:
; X87: # %bb.0: # %entry
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: retl
;
; X86-SSE-LABEL: f22:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: cvtss2sd %xmm0, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f22:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvtss2sd %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: f22:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.fpext.f64.f32(float %x,
metadata !"fpexcept.strict") #0
ret double %result
}
define i32 @f23(double %x) #0 {
; X87-LABEL: f23:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fstpl (%esp)
; X87-NEXT: wait
; X87-NEXT: calll lrint
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f23:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: calll lrint
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f23:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: callq lrint
; SSE-NEXT: popq %rcx
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f23:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: callq lrint
; AVX-NEXT: popq %rcx
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
%result = call i32 @llvm.experimental.constrained.lrint.i32.f64(double %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret i32 %result
}
define i32 @f24(float %x) #0 {
; X87-LABEL: f24:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fstps (%esp)
; X87-NEXT: wait
; X87-NEXT: calll lrintf
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f24:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: calll lrintf
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f24:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: callq lrintf
; SSE-NEXT: popq %rcx
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f24:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: callq lrintf
; AVX-NEXT: popq %rcx
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
%result = call i32 @llvm.experimental.constrained.lrint.i32.f32(float %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret i32 %result
}
define i64 @f25(double %x) #0 {
; X87-LABEL: f25:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fstpl (%esp)
; X87-NEXT: wait
; X87-NEXT: calll llrint
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f25:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: calll llrint
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f25:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: callq llrint
; SSE-NEXT: popq %rcx
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f25:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: callq llrint
; AVX-NEXT: popq %rcx
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
%result = call i64 @llvm.experimental.constrained.llrint.i64.f64(double %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret i64 %result
}
define i64 @f26(float %x) {
; X87-LABEL: f26:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fstps (%esp)
; X87-NEXT: calll llrintf
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f26:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: calll llrintf
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f26:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: callq llrintf
; SSE-NEXT: popq %rcx
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f26:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: callq llrintf
; AVX-NEXT: popq %rcx
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
%result = call i64 @llvm.experimental.constrained.llrint.i64.f32(float %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret i64 %result
}
define i32 @f27(double %x) #0 {
; X87-LABEL: f27:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fstpl (%esp)
; X87-NEXT: wait
; X87-NEXT: calll lround
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f27:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: calll lround
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f27:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: callq lround
; SSE-NEXT: popq %rcx
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f27:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: callq lround
; AVX-NEXT: popq %rcx
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
%result = call i32 @llvm.experimental.constrained.lround.i32.f64(double %x,
metadata !"fpexcept.strict") #0
ret i32 %result
}
define i32 @f28(float %x) #0 {
; X87-LABEL: f28:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fstps (%esp)
; X87-NEXT: wait
; X87-NEXT: calll lroundf
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f28:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: calll lroundf
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f28:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: callq lroundf
; SSE-NEXT: popq %rcx
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f28:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: callq lroundf
; AVX-NEXT: popq %rcx
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
%result = call i32 @llvm.experimental.constrained.lround.i32.f32(float %x,
metadata !"fpexcept.strict") #0
ret i32 %result
}
define i64 @f29(double %x) #0 {
; X87-LABEL: f29:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fstpl (%esp)
; X87-NEXT: wait
; X87-NEXT: calll llround
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f29:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: calll llround
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f29:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: callq llround
; SSE-NEXT: popq %rcx
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f29:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: callq llround
; AVX-NEXT: popq %rcx
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
%result = call i64 @llvm.experimental.constrained.llround.i64.f64(double %x,
metadata !"fpexcept.strict") #0
ret i64 %result
}
define i64 @f30(float %x) #0 {
; X87-LABEL: f30:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fstps (%esp)
; X87-NEXT: wait
; X87-NEXT: calll llroundf
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: f30:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: calll llroundf
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: f30:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %rax
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: callq llroundf
; SSE-NEXT: popq %rcx
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq
;
; AVX-LABEL: f30:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rax
; AVX-NEXT: .cfi_def_cfa_offset 16
; AVX-NEXT: callq llroundf
; AVX-NEXT: popq %rcx
; AVX-NEXT: .cfi_def_cfa_offset 8
; AVX-NEXT: retq
entry:
%result = call i64 @llvm.experimental.constrained.llround.i64.f32(float %x,
metadata !"fpexcept.strict") #0
ret i64 %result
}
; Verify that sitofp(%x) isn't simplified when the rounding mode is
; unknown.
; Verify that no gross errors happen.
define double @sifdb(i8 %x) #0 {
; X87-LABEL: sifdb:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: sifdb:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: sifdb:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movsbl %dil, %eax
; SSE-NEXT: cvtsi2sd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sifdb:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movsbl %dil, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.sitofp.f64.i8(i8 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
define double @sifdw(i16 %x) #0 {
; X87-LABEL: sifdw:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: sifdw:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: sifdw:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movswl %di, %eax
; SSE-NEXT: cvtsi2sd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sifdw:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movswl %di, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.sitofp.f64.i16(i16 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
define double @sifdi(i32 %x) #0 {
; X87-LABEL: sifdi:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: fildl (%esp)
; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: sifdi:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: sifdi:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvtsi2sd %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sifdi:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
define float @siffb(i8 %x) #0 {
; X87-LABEL: siffb:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: siffb:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: pushl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
; X86-SSE-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: siffb:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movsbl %dil, %eax
; SSE-NEXT: cvtsi2ss %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: siffb:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movsbl %dil, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.sitofp.f32.i8(i8 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %result
}
define float @siffw(i16 %x) #0 {
; X87-LABEL: siffw:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: siffw:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: pushl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
; X86-SSE-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: siffw:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movswl %di, %eax
; SSE-NEXT: cvtsi2ss %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: siffw:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movswl %di, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.sitofp.f32.i16(i16 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %result
}
define float @siffi(i32 %x) #0 {
; X87-LABEL: siffi:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: fildl (%esp)
; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: siffi:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: pushl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
; X86-SSE-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: siffi:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvtsi2ss %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: siffi:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %result
}
define double @sifdl(i64 %x) #0 {
; X87-LABEL: sifdl:
; X87: # %bb.0: # %entry
; X87-NEXT: fildll {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: retl
;
; X86-SSE-LABEL: sifdl:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fstpl (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: sifdl:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvtsi2sd %rdi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sifdl:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
define float @siffl(i64 %x) #0 {
; X87-LABEL: siffl:
; X87: # %bb.0: # %entry
; X87-NEXT: fildll {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: retl
;
; X86-SSE-LABEL: siffl:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: pushl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fstps (%esp)
; X86-SSE-NEXT: flds (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: siffl:
; SSE: # %bb.0: # %entry
; SSE-NEXT: cvtsi2ss %rdi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: siffl:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %result
}
; Verify that uitofp(%x) isn't simplified when the rounding mode is
; unknown.
; Verify that no gross errors happen.
define double @uifdb(i8 %x) #0 {
; X87-LABEL: uifdb:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: uifdb:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: uifdb:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movzbl %dil, %eax
; SSE-NEXT: cvtsi2sd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: uifdb:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movzbl %dil, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.uitofp.f64.i8(i8 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
define double @uifdw(i16 %x) #0 {
; X87-LABEL: uifdw:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: fildl (%esp)
; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: uifdw:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0
; X86-SSE-NEXT: movsd %xmm0, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: uifdw:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: cvtsi2sd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: uifdw:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.uitofp.f64.i16(i16 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
define double @uifdi(i32 %x) #0 {
; X87-LABEL: uifdi:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: movl $0, {{[0-9]+}}(%esp)
; X87-NEXT: fildll (%esp)
; X87-NEXT: wait
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: uifdi:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-SSE-NEXT: orpd %xmm0, %xmm1
; X86-SSE-NEXT: subsd %xmm0, %xmm1
; X86-SSE-NEXT: movsd %xmm1, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: uifdi:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movl %edi, %eax
; SSE-NEXT: cvtsi2sd %rax, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: uifdi:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: uifdi:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0
; AVX512-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
define double @uifdl(i64 %x) #0 {
; X87-LABEL: uifdl:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $20, %esp
; X87-NEXT: .cfi_def_cfa_offset 24
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: shrl $31, %ecx
; X87-NEXT: fildll (%esp)
; X87-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: addl $20, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: uifdl:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 16
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; X86-SSE-NEXT: subpd {{\.LCPI.*}}, %xmm0
; X86-SSE-NEXT: movapd %xmm0, %xmm1
; X86-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; X86-SSE-NEXT: addpd %xmm0, %xmm1
; X86-SSE-NEXT: movlpd %xmm1, (%esp)
; X86-SSE-NEXT: fldl (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: uifdl:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movq %rdi, %xmm1
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
; SSE-NEXT: subpd {{.*}}(%rip), %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: addpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: uifdl:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovq %rdi, %xmm0
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: uifdl:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0
; AVX512-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %result
}
define float @uiffb(i8 %x) #0 {
; X87-LABEL: uiffb:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X87-NEXT: filds {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: uiffb:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: pushl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: uiffb:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movzbl %dil, %eax
; SSE-NEXT: cvtsi2ss %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: uiffb:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movzbl %dil, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.uitofp.f32.i8(i8 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %result
}
define float @uiffw(i16 %x) #0 {
; X87-LABEL: uiffw:
; X87: # %bb.0: # %entry
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: fildl (%esp)
; X87-NEXT: wait
; X87-NEXT: popl %eax
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: uiffw:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: pushl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: uiffw:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: cvtsi2ss %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: uiffw:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.uitofp.f32.i16(i16 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %result
}
define float @uiffi(i32 %x) #0 {
; X87-LABEL: uiffi:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 16
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl %eax, (%esp)
; X87-NEXT: movl $0, {{[0-9]+}}(%esp)
; X87-NEXT: fildll (%esp)
; X87-NEXT: wait
; X87-NEXT: addl $12, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: uiffi:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: pushl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 8
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-SSE-NEXT: orpd %xmm0, %xmm1
; X86-SSE-NEXT: subsd %xmm0, %xmm1
; X86-SSE-NEXT: xorps %xmm0, %xmm0
; X86-SSE-NEXT: cvtsd2ss %xmm1, %xmm0
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: popl %eax
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: uiffi:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movl %edi, %eax
; SSE-NEXT: cvtsi2ss %rax, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: uiffi:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: uiffi:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0
; AVX512-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %result
}
define float @uiffl(i64 %x) #0 {
; X87-LABEL: uiffl:
; X87: # %bb.0: # %entry
; X87-NEXT: subl $20, %esp
; X87-NEXT: .cfi_def_cfa_offset 24
; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X87-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X87-NEXT: shrl $31, %ecx
; X87-NEXT: fildll {{[0-9]+}}(%esp)
; X87-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
; X87-NEXT: fstps {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: wait
; X87-NEXT: addl $20, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-SSE-LABEL: uiffl:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: subl $20, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 24
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: shrl $31, %eax
; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movss %xmm0, (%esp)
; X86-SSE-NEXT: flds (%esp)
; X86-SSE-NEXT: wait
; X86-SSE-NEXT: addl $20, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 4
; X86-SSE-NEXT: retl
;
; SSE-LABEL: uiffl:
; SSE: # %bb.0: # %entry
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: shrq %rax
; SSE-NEXT: movl %edi, %ecx
; SSE-NEXT: andl $1, %ecx
; SSE-NEXT: orq %rax, %rcx
; SSE-NEXT: testq %rdi, %rdi
; SSE-NEXT: cmovnsq %rdi, %rcx
; SSE-NEXT: cvtsi2ss %rcx, %xmm0
; SSE-NEXT: jns .LBB52_2
; SSE-NEXT: # %bb.1:
; SSE-NEXT: addss %xmm0, %xmm0
; SSE-NEXT: .LBB52_2: # %entry
; SSE-NEXT: retq
;
; AVX1-LABEL: uiffl:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: movq %rdi, %rax
; AVX1-NEXT: shrq %rax
; AVX1-NEXT: movl %edi, %ecx
; AVX1-NEXT: andl $1, %ecx
; AVX1-NEXT: orq %rax, %rcx
; AVX1-NEXT: testq %rdi, %rdi
; AVX1-NEXT: cmovnsq %rdi, %rcx
; AVX1-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0
; AVX1-NEXT: jns .LBB52_2
; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
; AVX1-NEXT: .LBB52_2: # %entry
; AVX1-NEXT: retq
;
; AVX512-LABEL: uiffl:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0
; AVX512-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %result
}
attributes #0 = { strictfp }
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.frem.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, metadata)
declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.exp.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.exp2.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.log10.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
declare i8 @llvm.experimental.constrained.fptosi.i8.f64(double, metadata)
declare i16 @llvm.experimental.constrained.fptosi.i16.f64(double, metadata)
declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata)
declare i128 @llvm.experimental.constrained.fptosi.i128.f64(double, metadata)
declare i8 @llvm.experimental.constrained.fptoui.i8.f64(double, metadata)
declare i16 @llvm.experimental.constrained.fptoui.i16.f64(double, metadata)
declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata)
declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata)
declare i128 @llvm.experimental.constrained.fptoui.i128.f64(double, metadata)
declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
declare i32 @llvm.experimental.constrained.lrint.i32.f64(double, metadata, metadata)
declare i32 @llvm.experimental.constrained.lrint.i32.f32(float, metadata, metadata)
declare i64 @llvm.experimental.constrained.llrint.i64.f64(double, metadata, metadata)
declare i64 @llvm.experimental.constrained.llrint.i64.f32(float, metadata, metadata)
declare i32 @llvm.experimental.constrained.lround.i32.f64(double, metadata)
declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata)
declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata)
declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata)
declare double @llvm.experimental.constrained.sitofp.f64.i8(i8, metadata, metadata)
declare double @llvm.experimental.constrained.sitofp.f64.i16(i16, metadata, metadata)
declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata)
declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata)
declare float @llvm.experimental.constrained.sitofp.f32.i8(i8, metadata, metadata)
declare float @llvm.experimental.constrained.sitofp.f32.i16(i16, metadata, metadata)
declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata)
declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata)
declare double @llvm.experimental.constrained.uitofp.f64.i8(i8, metadata, metadata)
declare double @llvm.experimental.constrained.uitofp.f64.i16(i16, metadata, metadata)
declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)
declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata)
declare float @llvm.experimental.constrained.uitofp.f32.i8(i8, metadata, metadata)
declare float @llvm.experimental.constrained.uitofp.f32.i16(i16, metadata, metadata)
declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata)
declare float @llvm.experimental.constrained.uitofp.f32.i64(i64, metadata, metadata)